diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cdd5404fcf..fb0154b969 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -546,3 +546,233 @@ jobs: - name: Test run: ../../../b2 toolset=$TOOLSET ${{ matrix.suite }} define=CI_SUPPRESS_KNOWN_ISSUES define=SLOW_COMPILER define=BOOST_MATH_STANDALONE define=BOOST_MP_STANDALONE working-directory: ../boost-root/libs/math/test + + posix-cmake-test: + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-22.04 + + runs-on: ${{matrix.os}} + + steps: + - uses: actions/checkout@v4 + + - name: Install packages + if: matrix.install + run: sudo apt install ${{matrix.install}} libgmp-dev libmpfr-dev libfftw3-dev + + - name: Setup Boost + run: | + echo GITHUB_REPOSITORY: $GITHUB_REPOSITORY + LIBRARY=${GITHUB_REPOSITORY#*/} + echo LIBRARY: $LIBRARY + echo "LIBRARY=$LIBRARY" >> $GITHUB_ENV + echo GITHUB_BASE_REF: $GITHUB_BASE_REF + echo GITHUB_REF: $GITHUB_REF + REF=${GITHUB_BASE_REF:-$GITHUB_REF} + REF=${REF#refs/heads/} + echo REF: $REF + BOOST_BRANCH=develop && [ "$REF" == "master" ] && BOOST_BRANCH=master || true + echo BOOST_BRANCH: $BOOST_BRANCH + cd .. + git clone -b $BOOST_BRANCH --depth 1 https://github.com/boostorg/boost.git boost-root + cd boost-root + mkdir -p libs/$LIBRARY + cp -r $GITHUB_WORKSPACE/* libs/$LIBRARY + git submodule update --init tools/boostdep + python tools/boostdep/depinst/depinst.py --git_args "--jobs 3" $LIBRARY + + - name: Configure + run: | + cd ../boost-root + mkdir __build__ && cd __build__ + cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON .. + + - name: Build tests + run: | + cd ../boost-root/__build__ + cmake --build . --target tests + + sycl-cmake-test: + strategy: + fail-fast: false + + runs-on: ubuntu-latest + + steps: + - name: Intel Apt repository + timeout-minutes: 1 + run: | + wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB + sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB + rm GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB + echo "deb https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list + sudo apt-get update + + - name: Install Intel oneAPI compilers + timeout-minutes: 5 + run: sudo apt-get install intel-oneapi-compiler-fortran intel-oneapi-compiler-dpcpp-cpp + + - name: Setup Intel oneAPI environment + run: | + source /opt/intel/oneapi/setvars.sh + printenv >> $GITHUB_ENV + + - name: checkout project code + uses: actions/checkout@v4 + + - name: Install Packages + run: | + sudo apt-get install -y cmake make + + - name: Setup Boost + run: | + echo GITHUB_REPOSITORY: $GITHUB_REPOSITORY + LIBRARY=${GITHUB_REPOSITORY#*/} + echo LIBRARY: $LIBRARY + echo "LIBRARY=$LIBRARY" >> $GITHUB_ENV + echo GITHUB_BASE_REF: $GITHUB_BASE_REF + echo GITHUB_REF: $GITHUB_REF + REF=${GITHUB_BASE_REF:-$GITHUB_REF} + REF=${REF#refs/heads/} + echo REF: $REF + BOOST_BRANCH=develop && [ "$REF" == "master" ] && BOOST_BRANCH=master || true + echo BOOST_BRANCH: $BOOST_BRANCH + cd .. + git clone -b $BOOST_BRANCH --depth 1 https://github.com/boostorg/boost.git boost-root + cd boost-root + mkdir -p libs/$LIBRARY + cp -r $GITHUB_WORKSPACE/* libs/$LIBRARY + git submodule update --init tools/boostdep + python3 tools/boostdep/depinst/depinst.py --git_args "--jobs 3" $LIBRARY + - name: Configure + run: | + cd ../boost-root + mkdir __build__ && cd __build__ + cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DBOOST_MATH_ENABLE_SYCL=ON .. + - name: Build tests + run: | + cd ../boost-root/__build__ + cmake --build . --target tests -j $(nproc) + - name: Run tests + run: | + cd ../boost-root/__build__ + ctest --output-on-failure --no-tests=error + cuda-cmake-test: + strategy: + fail-fast: false + + runs-on: ubuntu-22.04 + + steps: + - uses: Jimver/cuda-toolkit@v0.2.16 + id: cuda-toolkit + with: + cuda: '12.5.0' + method: 'network' + sub-packages: '["nvcc"]' + + - name: Output CUDA information + run: | + echo "Installed cuda version is: ${{steps.cuda-toolkit.outputs.cuda}}"+ + echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}" + nvcc -V + - uses: actions/checkout@v4 + + - name: Install Packages + run: | + sudo apt-get install -y cmake make + - name: Setup Boost + run: | + echo GITHUB_REPOSITORY: $GITHUB_REPOSITORY + LIBRARY=${GITHUB_REPOSITORY#*/} + echo LIBRARY: $LIBRARY + echo "LIBRARY=$LIBRARY" >> $GITHUB_ENV + echo GITHUB_BASE_REF: $GITHUB_BASE_REF + echo GITHUB_REF: $GITHUB_REF + REF=${GITHUB_BASE_REF:-$GITHUB_REF} + REF=${REF#refs/heads/} + echo REF: $REF + BOOST_BRANCH=develop && [ "$REF" == "master" ] && BOOST_BRANCH=master || true + echo BOOST_BRANCH: $BOOST_BRANCH + cd .. + git clone -b $BOOST_BRANCH --depth 1 https://github.com/boostorg/boost.git boost-root + cd boost-root + mkdir -p libs/$LIBRARY + cp -r $GITHUB_WORKSPACE/* libs/$LIBRARY + git submodule update --init tools/boostdep + python3 tools/boostdep/depinst/depinst.py --git_args "--jobs 3" $LIBRARY + - name: Configure + run: | + cd ../boost-root + mkdir __build__ && cd __build__ + cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_MATH_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES=70 -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.5 .. + - name: Build tests + run: | + cd ../boost-root/__build__ + cmake --build . --target tests -j $(nproc) + # Will leave this commented out for now. GHA does not install graphics cards by default + #- name: Run tests + # run: | + # cd ../boost-root/__build__ + # ctest --output-on-failure --no-tests=error + nvrtc-cmake-test: + strategy: + fail-fast: false + + runs-on: ubuntu-22.04 + + steps: + - uses: Jimver/cuda-toolkit@v0.2.16 + id: cuda-toolkit + with: + cuda: '12.5.0' + method: 'network' + + - name: Output CUDA information + run: | + echo "Installed cuda version is: ${{steps.cuda-toolkit.outputs.cuda}}"+ + echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}" + nvcc -V + - uses: actions/checkout@v4 + + - name: Install Packages + run: | + sudo apt-get install -y cmake make + - name: Setup Boost + run: | + echo GITHUB_REPOSITORY: $GITHUB_REPOSITORY + LIBRARY=${GITHUB_REPOSITORY#*/} + echo LIBRARY: $LIBRARY + echo "LIBRARY=$LIBRARY" >> $GITHUB_ENV + echo GITHUB_BASE_REF: $GITHUB_BASE_REF + echo GITHUB_REF: $GITHUB_REF + REF=${GITHUB_BASE_REF:-$GITHUB_REF} + REF=${REF#refs/heads/} + echo REF: $REF + BOOST_BRANCH=develop && [ "$REF" == "master" ] && BOOST_BRANCH=master || true + echo BOOST_BRANCH: $BOOST_BRANCH + cd .. + git clone -b $BOOST_BRANCH --depth 1 https://github.com/boostorg/boost.git boost-root + cd boost-root + mkdir -p libs/$LIBRARY + cp -r $GITHUB_WORKSPACE/* libs/$LIBRARY + git submodule update --init tools/boostdep + python3 tools/boostdep/depinst/depinst.py --git_args "--jobs 3" $LIBRARY + - name: Configure + run: | + cd ../boost-root + mkdir __build__ && cd __build__ + cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_MATH_ENABLE_NVRTC=1 -DCMAKE_CUDA_ARCHITECTURES=70 -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.5 -DBOOST_MATH_NVRTC_CI_RUN=1 .. + pwd + - name: Build tests + run: | + cd ../boost-root/__build__ + cmake --build . --target tests -j $(nproc) + # We don't have the ability for runtime right now + #- name: Run tests + # run: | + # cd ../boost-root/__build__ + # ctest --output-on-failure --no-tests=error diff --git a/CMakeLists.txt b/CMakeLists.txt index 7e7790271c..7965bd1ea9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,13 +5,17 @@ cmake_minimum_required(VERSION 3.5...3.16) -project(boost_math VERSION "${BOOST_SUPERPROJECT_VERSION}" LANGUAGES CXX) +project(boost_math VERSION 1.87.0 LANGUAGES CXX) add_library(boost_math INTERFACE) add_library(Boost::math ALIAS boost_math) target_include_directories(boost_math INTERFACE include) +if(NOT CMAKE_VERSION VERSION_LESS "3.19") + file(GLOB_RECURSE headers include/*.hpp) + target_sources(boost_math PRIVATE ${headers}) +endif() include(CMakeDependentOption) @@ -41,12 +45,17 @@ else() endif() +if(BUILD_TESTING AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/CMakeLists.txt") + + add_subdirectory(test) + # Only enable tests when we're the root project -if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) +elseif(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) include(CTest) add_subdirectory(test) include(GNUInstallDirs) install(DIRECTORY "include/" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") + endif() diff --git a/build.jam b/build.jam new file mode 100644 index 0000000000..fb244e511d --- /dev/null +++ b/build.jam @@ -0,0 +1,48 @@ +# Copyright René Ferdinand Rivera Morell 2023-2024 +# Distributed under the Boost Software License, Version 1.0. +# (See accompanying file LICENSE_1_0.txt or copy at +# http://www.boost.org/LICENSE_1_0.txt) + +require-b2 5.2 ; + +constant boost_dependencies : + /boost/assert//boost_assert + /boost/concept_check//boost_concept_check + /boost/config//boost_config + /boost/core//boost_core + /boost/integer//boost_integer + /boost/lexical_cast//boost_lexical_cast + /boost/predef//boost_predef + /boost/random//boost_random + /boost/static_assert//boost_static_assert + /boost/throw_exception//boost_throw_exception ; + +project /boost/math + : common-requirements + include + ; + +explicit + [ alias boost_math : : : : $(boost_dependencies) ] + [ alias boost_math_c99 : build//boost_math_c99 ] + [ alias boost_math_c99f : build//boost_math_c99f ] + [ alias boost_math_c99l : build//boost_math_c99l ] + [ alias boost_math_tr1 : build//boost_math_tr1 ] + [ alias boost_math_tr1f : build//boost_math_tr1f ] + [ alias boost_math_tr1l : build//boost_math_tr1l ] + [ alias all : + boost_math + boost_math_c99 boost_math_c99f boost_math_c99l + boost_math_tr1 boost_math_tr1f boost_math_tr1l + example test ] + [ alias testing : : : : + test + include_private ] + ; + +call-if : boost-library math + : install boost_math + boost_math_c99 boost_math_c99f boost_math_c99l + boost_math_tr1 boost_math_tr1f boost_math_tr1l + ; + diff --git a/build/Jamfile.v2 b/build/Jamfile.v2 index 6549e06b79..500c77363d 100644 --- a/build/Jamfile.v2 +++ b/build/Jamfile.v2 @@ -1,16 +1,18 @@ # copyright John Maddock 2008 -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or copy at +# Distributed under the Boost Software License, Version 1.0. +# (See accompanying file LICENSE_1_0.txt or copy at # http://www.boost.org/LICENSE_1_0.txt. import testing ; import pch ; -import ../../config/checks/config : requires ; +import-search /boost/config/checks ; +import config : requires ; -project - : requirements - intel-win:-nologo - intel-win:-nologo +project + : common-requirements $(boost_dependencies) + : requirements + intel-win:-nologo + intel-win:-nologo #intel-linux:off intel-darwin:off gcc,windows:off @@ -20,9 +22,11 @@ project [ check-target-builds ../config//has_gcc_visibility "gcc visibility" : gcc:-fvisibility=hidden : ] [ requires cxx11_noexcept cxx11_rvalue_references sfinae_expr cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_hdr_tuple cxx11_hdr_initializer_list cxx11_hdr_chrono cxx11_thread_local cxx11_constexpr cxx11_nullptr cxx11_numeric_limits cxx11_decltype cxx11_hdr_array cxx11_hdr_atomic cxx11_hdr_type_traits cxx11_allocator cxx11_explicit_conversion_operators ] [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction ] + : usage-requirements + BOOST_MATH_TR1_NO_LIB=1 ; -cpp-pch pch : ../src/tr1/pch.hpp : ../src/tr1 shared:BOOST_MATH_TR1_DYN_LINK=1 ; +cpp-pch pch : ../src/tr1/pch.hpp : ../src/tr1 shared:BOOST_MATH_TR1_DYN_LINK=1 ; C99_SOURCES = acosh asinh @@ -46,7 +50,7 @@ round tgamma trunc ; -TR1_SOURCES = +TR1_SOURCES = assoc_laguerre assoc_legendre beta @@ -80,22 +84,22 @@ import targets ; obj long_double_check : ../config/has_long_double_support.cpp ; explicit long_double_check ; - + # Library targets lib boost_math_tr1 : ../src/tr1/$(TR1_SOURCES).cpp pch - : + : shared:BOOST_MATH_TR1_DYN_LINK=1 ../src/tr1 ; lib boost_math_tr1f : ../src/tr1/$(TR1_SOURCES)f.cpp pch - : + : shared:BOOST_MATH_TR1_DYN_LINK=1 ../src/tr1 ; lib boost_math_tr1l : ../src/tr1/$(TR1_SOURCES)l.cpp pch - : + : shared:BOOST_MATH_TR1_DYN_LINK=1 ../config//has_long_double_support ../src/tr1 @@ -103,23 +107,21 @@ lib boost_math_tr1l : ../src/tr1/$(TR1_SOURCES)l.cpp pch ; lib boost_math_c99 : ../src/tr1/$(C99_SOURCES).cpp pch - : + : shared:BOOST_MATH_TR1_DYN_LINK=1 ../src/tr1 ; lib boost_math_c99f : ../src/tr1/$(C99_SOURCES)f.cpp pch - : + : shared:BOOST_MATH_TR1_DYN_LINK=1 ../src/tr1 ; lib boost_math_c99l : ../src/tr1/$(C99_SOURCES)l.cpp pch - : + : shared:BOOST_MATH_TR1_DYN_LINK=1 ../config//has_long_double_support ../src/tr1 [ check-target-builds ../config//has_long_double_support "long double support" : : no ] ; - -boost-install boost_math_c99 boost_math_c99f boost_math_c99l boost_math_tr1 boost_math_tr1f boost_math_tr1l ; diff --git a/config/Jamfile.v2 b/config/Jamfile.v2 index 77aca7c2e3..650e888809 100644 --- a/config/Jamfile.v2 +++ b/config/Jamfile.v2 @@ -9,11 +9,11 @@ import path ; local ntl-path = [ modules.peek : NTL_PATH ] ; local gmp_path = [ modules.peek : GMP_PATH ] ; -lib quadmath ; -lib fftw3 ; -lib fftw3f ; -lib fftw3l ; -lib fftw3q ; +searched-lib quadmath ; +searched-lib fftw3 ; +searched-lib fftw3f ; +searched-lib fftw3l ; +searched-lib fftw3q ; obj has_long_double_support : has_long_double_support.cpp ; obj has_mpfr_class : has_mpfr_class.cpp : diff --git a/doc/Jamfile.v2 b/doc/Jamfile.v2 index 71746726f5..511262d493 100644 --- a/doc/Jamfile.v2 +++ b/doc/Jamfile.v2 @@ -13,7 +13,7 @@ path-constant here : . ; constant here-url : [ regex.replace $(here) "\\\\" "/" ] ; xml math : math.qbk : - enable_index + enable_index __base_path__=$(here-url) pdf:__build_pdf html:__build_html @@ -25,7 +25,7 @@ boostbook standalone # Path for links to Boost: boost.root=../../../.. html.stylesheet=math.css - + # Some general style settings: table.footnote.number.format=1 footnote.number.format=1 @@ -46,11 +46,11 @@ boostbook standalone # Index on type: index.on.type=1 boost.noexpand.chapter.toc=1 - + #root.filename="sf_dist_and_tools" #graphicsize.extension=1 #use.extensions=1 - + # PDF Options: # TOC Generation: this is needed for FOP-0.9 and later: fop1.extensions=0 @@ -74,7 +74,7 @@ boostbook standalone # better use SVG's instead: pdf:admon.graphics.extension=".svg" pdf:admon.graphics.path=$(here)/../../../doc/src/images/ - pdf:use.role.for.mediaobject=1 + pdf:use.role.for.mediaobject=1 pdf:preferred.mediaobject.role=print pdf:img.src.path=$(images_location)/ pdf:draft.mode="no" @@ -82,7 +82,7 @@ boostbook standalone on pdf:off html:on $(here)/index.idx - $(here)/../../.. + $(here)/../include #on pdf:index.on.type=1 ; @@ -90,7 +90,7 @@ boostbook standalone install pdfinstall : standalone/pdf : . PDF math.pdf ; explicit pdfinstall ; # b2 pdf pdfinstall to do this pdf file copy. -install css_install : math.css : $(here)/html ; +install css_install : math.css : $(here)/html ; ############################################################################### alias boostdoc ; diff --git a/doc/constants/constants.qbk b/doc/constants/constants.qbk index 24092adf56..9cce152da1 100644 --- a/doc/constants/constants.qbk +++ b/doc/constants/constants.qbk @@ -227,6 +227,11 @@ either construct from a decimal digit string or calculate on the fly depending u [[Any other value ['N]][Sets the compile time precision to ['N] bits.]] ] +[h5 GPU Support] + +All Boost.Math constants are marked with `BOOST_MATH_GPU_ENABLED` and can be used on both host and device. +Note that when running on device you are limited to using only `float` and `double` types. + [h5 Custom Specializing a constant] In addition, for user-defined types that need special handling, it's possible to partially-specialize diff --git a/doc/distributions/arcsine.qbk b/doc/distributions/arcsine.qbk index fbd6e86b1e..7930f97d5a 100644 --- a/doc/distributions/arcsine.qbk +++ b/doc/distributions/arcsine.qbk @@ -21,11 +21,11 @@ typedef Policy policy_type; // Constructor from two range parameters, x_min and x_max: - arcsine_distribution(RealType x_min = 0, RealType x_max = 1); + BOOST_MATH_GPU_ENABLED arcsine_distribution(RealType x_min = 0, RealType x_max = 1); // Range Parameter accessors: - RealType x_min() const; - RealType x_max() const; + BOOST_MATH_GPU_ENABLED RealType x_min() const; + BOOST_MATH_GPU_ENABLED RealType x_max() const; }; }} // namespaces @@ -103,8 +103,8 @@ constructs a 'Standard 01' arcsine distribution. [h5 Parameter Accessors] - RealType x_min() const; - RealType x_max() const; + BOOST_MATH_GPU_ENABLED RealType x_min() const; + BOOST_MATH_GPU_ENABLED RealType x_max() const; Return the parameter ['x_min] or ['x_max] from which this distribution was constructed. @@ -116,6 +116,8 @@ So, for example: All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The formulae for calculating these are shown in the table below, and at [@http://mathworld.wolfram.com/arcsineDistribution.html Wolfram Mathworld]. diff --git a/doc/distributions/bernoulli.qbk b/doc/distributions/bernoulli.qbk index 4a2fc7b618..719c42cd9e 100644 --- a/doc/distributions/bernoulli.qbk +++ b/doc/distributions/bernoulli.qbk @@ -16,9 +16,9 @@ typedef RealType value_type; typedef Policy policy_type; - bernoulli_distribution(RealType p); // Constructor. + BOOST_MATH_GPU_ENABLED bernoulli_distribution(RealType p); // Constructor. // Accessor function. - RealType success_fraction() const + BOOST_MATH_GPU_ENABLED RealType success_fraction() const // Probability of success (as a fraction). }; }} // namespaces @@ -51,12 +51,12 @@ and the [@http://en.wikipedia.org/wiki/Cumulative_Distribution_Function Cumulati [h4 Member Functions] - bernoulli_distribution(RealType p); + BOOST_MATH_GPU_ENABLED bernoulli_distribution(RealType p); Constructs a [@http://en.wikipedia.org/wiki/bernoulli_distribution bernoulli distribution] with success_fraction /p/. - RealType success_fraction() const + BOOST_MATH_GPU_ENABLED RealType success_fraction() const Returns the /success_fraction/ parameter of this distribution. @@ -64,6 +64,8 @@ Returns the /success_fraction/ parameter of this distribution. All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The domain of the random variable is 0 and 1, and the useful supported range is only 0 or 1. diff --git a/doc/distributions/beta.qbk b/doc/distributions/beta.qbk index 95943f715d..5ba1a6d1cc 100644 --- a/doc/distributions/beta.qbk +++ b/doc/distributions/beta.qbk @@ -19,30 +19,30 @@ typedef RealType value_type; typedef Policy policy_type; // Constructor from two shape parameters, alpha & beta: - beta_distribution(RealType a, RealType b); + BOOST_MATH_GPU_ENABLED beta_distribution(RealType a, RealType b); // Parameter accessors: - RealType alpha() const; - RealType beta() const; + BOOST_MATH_GPU_ENABLED RealType alpha() const; + BOOST_MATH_GPU_ENABLED RealType beta() const; // Parameter estimators of alpha or beta from mean and variance. - static RealType find_alpha( + BOOST_MATH_GPU_ENABLED static RealType find_alpha( RealType mean, // Expected value of mean. RealType variance); // Expected value of variance. - static RealType find_beta( + BOOST_MATH_GPU_ENABLED static RealType find_beta( RealType mean, // Expected value of mean. RealType variance); // Expected value of variance. // Parameter estimators from // either alpha or beta, and x and probability. - static RealType find_alpha( + BOOST_MATH_GPU_ENABLED static RealType find_alpha( RealType beta, // from beta. RealType x, // x. RealType probability); // cdf - static RealType find_beta( + BOOST_MATH_GPU_ENABLED static RealType find_beta( RealType alpha, // alpha. RealType x, // probability x. RealType probability); // probability cdf. @@ -98,7 +98,7 @@ whose apex is away from the centre (where x = half). [h5 Constructor] - beta_distribution(RealType alpha, RealType beta); + BOOST_MATH_GPU_ENABLED beta_distribution(RealType alpha, RealType beta); Constructs a beta distribution with shape parameters /alpha/ and /beta/. @@ -117,11 +117,11 @@ in the graph above). [h5 Parameter Accessors] - RealType alpha() const; + BOOST_MATH_GPU_ENABLED RealType alpha() const; Returns the parameter /alpha/ from which this distribution was constructed. - RealType beta() const; + BOOST_MATH_GPU_ENABLED RealType beta() const; Returns the parameter /beta/ from which this distribution was constructed. @@ -182,6 +182,8 @@ Returns the value of [beta] that gives: All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The formulae for calculating these are shown in the table below, and at [@http://mathworld.wolfram.com/BetaDistribution.html Wolfram Mathworld]. diff --git a/doc/distributions/cauchy.qbk b/doc/distributions/cauchy.qbk index 6ae090818a..e59e3760ed 100644 --- a/doc/distributions/cauchy.qbk +++ b/doc/distributions/cauchy.qbk @@ -15,10 +15,10 @@ typedef RealType value_type; typedef Policy policy_type; - cauchy_distribution(RealType location = 0, RealType scale = 1); + BOOST_MATH_GPU_ENABLED cauchy_distribution(RealType location = 0, RealType scale = 1); - RealType location()const; - RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType location()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; }; The [@http://en.wikipedia.org/wiki/Cauchy_distribution Cauchy-Lorentz distribution] @@ -53,7 +53,7 @@ the distribution: [h4 Member Functions] - cauchy_distribution(RealType location = 0, RealType scale = 1); + BOOST_MATH_GPU_ENABLED cauchy_distribution(RealType location = 0, RealType scale = 1); Constructs a Cauchy distribution, with location parameter /location/ and scale parameter /scale/. When these parameters take their default @@ -62,11 +62,11 @@ then the result is a Standard Cauchy Distribution. Requires scale > 0, otherwise calls __domain_error. - RealType location()const; + BOOST_MATH_GPU_ENABLED RealType location()const; Returns the location parameter of the distribution. - RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; Returns the scale parameter of the distribution. @@ -74,6 +74,8 @@ Returns the scale parameter of the distribution. All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. Note however that the Cauchy distribution does not have a mean, standard deviation, etc. See __math_undefined @@ -116,7 +118,7 @@ So recall that for `x < 0`: Substituting into the above we get: -[expression p = -atan(1/x) ; x < 0] +[expression p = -atan(1/x) / [pi] ; x < 0] So the procedure is to calculate the cdf for -fabs(x) using the above formula. Note that to factor in the location and scale diff --git a/doc/distributions/chi_squared.qbk b/doc/distributions/chi_squared.qbk index 753e1f401d..b52d4d392d 100644 --- a/doc/distributions/chi_squared.qbk +++ b/doc/distributions/chi_squared.qbk @@ -18,13 +18,13 @@ typedef Policy policy_type; // Constructor: - chi_squared_distribution(RealType i); + BOOST_MATH_GPU_ENABLED chi_squared_distribution(RealType i); // Accessor to parameter: - RealType degrees_of_freedom()const; + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom()const; // Parameter estimation: - static RealType find_degrees_of_freedom( + BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom( RealType difference_from_mean, RealType alpha, RealType beta, @@ -104,6 +104,8 @@ See also section on Sample sizes required in All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. (We have followed the usual restriction of the mode to degrees of freedom >= 2, but note that the maximum of the pdf is actually zero for degrees of freedom from 2 down to 0, diff --git a/doc/distributions/dist_reference.qbk b/doc/distributions/dist_reference.qbk index c225d1953e..2dd06bcb8b 100644 --- a/doc/distributions/dist_reference.qbk +++ b/doc/distributions/dist_reference.qbk @@ -16,15 +16,18 @@ [include fisher.qbk] [include gamma.qbk] [include geometric.qbk] +[include holtsmark.qbk] [include hyperexponential.qbk] [include hypergeometric.qbk] [include inverse_chi_squared.qbk] [include inverse_gamma.qbk] [include inverse_gaussian.qbk] [include kolmogorov_smirnov.qbk] +[include landau.qbk] [include laplace.qbk] [include logistic.qbk] [include lognormal.qbk] +[include mapairy.qbk] [include negative_binomial.qbk] [include nc_beta.qbk] [include nc_chi_squared.qbk] @@ -34,6 +37,7 @@ [include pareto.qbk] [include poisson.qbk] [include rayleigh.qbk] +[include saspoint5.qbk] [include skew_normal.qbk] [include students_t.qbk] [include triangular.qbk] diff --git a/doc/distributions/dist_tutorial.qbk b/doc/distributions/dist_tutorial.qbk index ee28dbdca5..80e8e2e458 100644 --- a/doc/distributions/dist_tutorial.qbk +++ b/doc/distributions/dist_tutorial.qbk @@ -128,12 +128,49 @@ And quantiles are just the same: quantile(my_dist, p); // Returns the value of the random variable x // such that cdf(my_dist, x) == p. +As are the logcdf (Natural log of the Cumulative Distribution Function): + + logcdf(my_dist, x); // Returns logcdf at at point x of distribution my_dist. + +And the logpdf (Natural log of the Probability Density Function): + + logpdf(my_dist, x); // Returns logpdf at point x of distribution my_dist. + If you're wondering why these aren't member functions, it's to make the library more easily extensible: if you want to add additional generic operations - let's say the /n'th moment/ - then all you have to do is add the appropriate non-member functions, overloaded for each implemented distribution type. +The logcdf and logpdf functions are minimally calculated with log(cdf(my_dist, x)), +and log(pdf(my_dist, x)) respectively. The following distributions have specialized +implementations of the logcdf: + +* Exponential +* Extreme Value +* Geometric +* Laplace +* Logistic +* Pareto +* Rayleigh +* Weibull + +And the following distributions have specialized implementations of logpdf: + +* Exponential +* Extreme Value +* Gamma +* Inverse Gamma +* Inverse Gaussian +* Laplace +* Normal +* Poisson +* Rayleigh +* Weibull + +These above listed specialized implementations allow a higher degree of precision +than can be obtained through the naive generic method. + [tip [*Random numbers that approximate Quantiles of Distributions] diff --git a/doc/distributions/exponential.qbk b/doc/distributions/exponential.qbk index 043818b4a4..3c90a96483 100644 --- a/doc/distributions/exponential.qbk +++ b/doc/distributions/exponential.qbk @@ -15,9 +15,9 @@ typedef RealType value_type; typedef Policy policy_type; - exponential_distribution(RealType lambda = 1); + BOOST_MATH_GPU_ENABLED exponential_distribution(RealType lambda = 1); - RealType lambda()const; + BOOST_MATH_GPU_ENABLED RealType lambda()const; }; @@ -37,7 +37,7 @@ values of the rate parameter lambda: [h4 Member Functions] - exponential_distribution(RealType lambda = 1); + BOOST_MATH_GPU_ENABLED exponential_distribution(RealType lambda = 1); Constructs an [@http://en.wikipedia.org/wiki/Exponential_distribution Exponential distribution] @@ -46,7 +46,7 @@ Lambda is defined as the reciprocal of the scale parameter. Requires lambda > 0, otherwise calls __domain_error. - RealType lambda()const; + BOOST_MATH_GPU_ENABLED RealType lambda()const; Accessor function returns the lambda parameter of the distribution. @@ -54,9 +54,14 @@ Accessor function returns the lambda parameter of the distribution. All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The domain of the random variable is \[0, +[infin]\]. +In this distribution the implementation of both `logcdf`, and `logpdf` are specialized +to improve numerical accuracy. + [h4 Accuracy] The exponential distribution is implemented in terms of the @@ -71,7 +76,9 @@ In the following table [lambda] is the parameter lambda of the distribution, [table [[Function][Implementation Notes]] [[pdf][Using the relation: pdf = [lambda] * exp(-[lambda] * x) ]] +[[logpdf][log(pdf) = -expm1(-x * [lambda]) ]] [[cdf][Using the relation: p = 1 - exp(-x * [lambda]) = -expm1(-x * [lambda]) ]] +[[logcdf][log(cdf) = log1p(-exp(-x * [lambda])) ]] [[cdf complement][Using the relation: q = exp(-x * [lambda]) ]] [[quantile][Using the relation: x = -log(1-p) / [lambda] = -log1p(-p) / [lambda]]] [[quantile from the complement][Using the relation: x = -log(q) / [lambda]]] diff --git a/doc/distributions/extreme_value.qbk b/doc/distributions/extreme_value.qbk index 314917ebc1..f47467d2bd 100644 --- a/doc/distributions/extreme_value.qbk +++ b/doc/distributions/extreme_value.qbk @@ -14,10 +14,10 @@ public: typedef RealType value_type; - extreme_value_distribution(RealType location = 0, RealType scale = 1); + BOOST_MATH_GPU_ENABLED extreme_value_distribution(RealType location = 0, RealType scale = 1); - RealType scale()const; - RealType location()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType location()const; }; There are various @@ -59,18 +59,18 @@ And this graph illustrates how the PDF varies with the shape parameter: [h4 Member Functions] - extreme_value_distribution(RealType location = 0, RealType scale = 1); + BOOST_MATH_GPU_ENABLED extreme_value_distribution(RealType location = 0, RealType scale = 1); Constructs an Extreme Value distribution with the specified location and scale parameters. Requires `scale > 0`, otherwise calls __domain_error. - RealType location()const; + BOOST_MATH_GPU_ENABLED RealType location()const; Returns the location parameter of the distribution. - RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; Returns the scale parameter of the distribution. @@ -78,9 +78,14 @@ Returns the scale parameter of the distribution. All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The domain of the random parameter is \[-[infin], +[infin]\]. +In this distribution the implementation of both `logcdf`, and `logpdf` are specialized +to improve numerical accuracy. + [h4 Accuracy] The extreme value distribution is implemented in terms of the @@ -96,7 +101,9 @@ In the following table: [table [[Function][Implementation Notes]] [[pdf][Using the relation: pdf = exp((a-x)/b) * exp(-exp((a-x)/b)) / b ]] +[[logpdf][log(pdf) = log(1/b) + e - exp(e) ]] [[cdf][Using the relation: p = exp(-exp((a-x)/b)) ]] +[[logcdf][log(cdf) = -exp((a-x)/b) ]] [[cdf complement][Using the relation: q = -expm1(-exp((a-x)/b)) ]] [[quantile][Using the relation: a - log(-log(p)) * b]] [[quantile from the complement][Using the relation: a - log(-log1p(-q)) * b]] diff --git a/doc/distributions/fisher.qbk b/doc/distributions/fisher.qbk index 80c9a9b29b..9b3a55f59d 100644 --- a/doc/distributions/fisher.qbk +++ b/doc/distributions/fisher.qbk @@ -17,11 +17,11 @@ typedef RealType value_type; // Construct: - fisher_f_distribution(const RealType& i, const RealType& j); + BOOST_MATH_GPU_ENABLED fisher_f_distribution(const RealType& i, const RealType& j); // Accessors: - RealType degrees_of_freedom1()const; - RealType degrees_of_freedom2()const; + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom1()const; + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom2()const; }; }} //namespaces @@ -46,7 +46,7 @@ two degrees of freedom parameters. [h4 Member Functions] - fisher_f_distribution(const RealType& df1, const RealType& df2); + BOOST_MATH_GPU_ENABLED fisher_f_distribution(const RealType& df1, const RealType& df2); Constructs an F-distribution with numerator degrees of freedom /df1/ and denominator degrees of freedom /df2/. @@ -54,11 +54,11 @@ and denominator degrees of freedom /df2/. Requires that /df1/ and /df2/ are both greater than zero, otherwise __domain_error is called. - RealType degrees_of_freedom1()const; + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom1()const; Returns the numerator degrees of freedom parameter of the distribution. - RealType degrees_of_freedom2()const; + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom2()const; Returns the denominator degrees of freedom parameter of the distribution. @@ -66,6 +66,8 @@ Returns the denominator degrees of freedom parameter of the distribution. All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The domain of the random variable is \[0, +[infin]\]. diff --git a/doc/distributions/gamma.qbk b/doc/distributions/gamma.qbk index eefcc84a0c..dd34ed2fc0 100644 --- a/doc/distributions/gamma.qbk +++ b/doc/distributions/gamma.qbk @@ -12,10 +12,10 @@ typedef RealType value_type; typedef Policy policy_type; - gamma_distribution(RealType shape, RealType scale = 1) + BOOST_MATH_GPU_ENABLED gamma_distribution(RealType shape, RealType scale = 1) - RealType shape()const; - RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType shape()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; }; }} // namespaces @@ -76,7 +76,7 @@ a dedicated Erlang Distribution. [h4 Member Functions] - gamma_distribution(RealType shape, RealType scale = 1); + BOOST_MATH_GPU_ENABLED gamma_distribution(RealType shape, RealType scale = 1); Constructs a gamma distribution with shape /shape/ and scale /scale/. @@ -84,11 +84,11 @@ scale /scale/. Requires that the shape and scale parameters are greater than zero, otherwise calls __domain_error. - RealType shape()const; + BOOST_MATH_GPU_ENABLED RealType shape()const; Returns the /shape/ parameter of this distribution. - RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; Returns the /scale/ parameter of this distribution. @@ -96,9 +96,14 @@ Returns the /scale/ parameter of this distribution. All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The domain of the random variable is \[0,+[infin]\]. +In this distribution the implementation of `logpdf` is specialized +to improve numerical accuracy. + [h4 Accuracy] The gamma distribution is implemented in terms of the @@ -115,6 +120,7 @@ and /q = 1-p/. [table [[Function][Implementation Notes]] [[pdf][Using the relation: pdf = __gamma_p_derivative(k, x / [theta]) / [theta] ]] +[[logpdf][log(pdf) = -k*log([theta]) + (k-1)*log(x) - lgamma(k) - (x/[theta]) ]] [[cdf][Using the relation: p = __gamma_p(k, x / [theta]) ]] [[cdf complement][Using the relation: q = __gamma_q(k, x / [theta]) ]] [[quantile][Using the relation: x = [theta] * __gamma_p_inv(k, p) ]] diff --git a/doc/distributions/geometric.qbk b/doc/distributions/geometric.qbk index 7aa1a33439..2a4357a2a5 100644 --- a/doc/distributions/geometric.qbk +++ b/doc/distributions/geometric.qbk @@ -17,28 +17,28 @@ typedef RealType value_type; typedef Policy policy_type; // Constructor from success_fraction: - geometric_distribution(RealType p); + BOOST_MATH_GPU_ENABLED geometric_distribution(RealType p); // Parameter accessors: - RealType success_fraction() const; - RealType successes() const; + BOOST_MATH_GPU_ENABLED RealType success_fraction() const; + BOOST_MATH_GPU_ENABLED RealType successes() const; // Bounds on success fraction: - static RealType find_lower_bound_on_p( + BOOST_MATH_GPU_ENABLED static RealType find_lower_bound_on_p( RealType trials, RealType successes, RealType probability); // alpha - static RealType find_upper_bound_on_p( + BOOST_MATH_GPU_ENABLED static RealType find_upper_bound_on_p( RealType trials, RealType successes, RealType probability); // alpha // Estimate min/max number of trials: - static RealType find_minimum_number_of_trials( + BOOST_MATH_GPU_ENABLED static RealType find_minimum_number_of_trials( RealType k, // Number of failures. RealType p, // Success fraction. RealType probability); // Probability threshold alpha. - static RealType find_maximum_number_of_trials( + BOOST_MATH_GPU_ENABLED static RealType find_maximum_number_of_trials( RealType k, // Number of failures. RealType p, // Success fraction. RealType probability); // Probability threshold alpha. @@ -268,6 +268,8 @@ of observing more than k failures. All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. However it's worth taking a moment to define what these actually mean in the context of this distribution: @@ -303,6 +305,9 @@ the context of this distribution: ``quantile(complement(geometric(p), P))``]] ] +In this distribution the implementation of `logcdf` is specialized +to improve numerical accuracy. + [h4 Accuracy] This distribution is implemented using the pow and exp functions, so most results @@ -322,6 +327,7 @@ the expected number of failures using the quantile. [[Function][Implementation Notes]] [[pdf][pdf = p * pow(q, k)]] [[cdf][cdf = 1 - q[super k=1]]] +[[logcdf][log(cdf) = log1p(-exp(log1p(-p) * (k+1)))]] [[cdf complement][exp(log1p(-p) * (k+1))]] [[quantile][k = log1p(-x) / log1p(-p) -1]] [[quantile from the complement][k = log(x) / log1p(-p) -1]] diff --git a/doc/distributions/holtsmark.qbk b/doc/distributions/holtsmark.qbk new file mode 100644 index 0000000000..39c42ff133 --- /dev/null +++ b/doc/distributions/holtsmark.qbk @@ -0,0 +1,118 @@ +[section:holtsmark_dist Holtsmark Distribution] + +``#include `` + + template + class holtsmark_distribution; + + typedef holtsmark_distribution<> holtsmark; + + template + class holtsmark_distribution + { + public: + typedef RealType value_type; + typedef Policy policy_type; + + BOOST_MATH_GPU_ENABLED holtsmark_distribution(RealType location = 0, RealType scale = 1); + + BOOST_MATH_GPU_ENABLED RealType location()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; + }; + +The [@http://en.wikipedia.org/wiki/holtsmark_distribution Holtsmark distribution] +is named after Johan Peter Holtsmark. +It is special case of a [@http://en.wikipedia.org/wiki/Stable_distribution stable distribution] +with shape parameter [alpha]=3/2, [beta]=0. + +[@http://en.wikipedia.org/wiki/Probability_distribution probability distribution function PDF] +given by: + +[equation holtsmark_ref1] [/f(x; \mu, c)=\frac{1}{2 \pi} \int_{-\infty}^{\infty} \exp( i t \mu - |c t|^{3/2} ) e^{-i x t} dt] + +The location parameter [mu] is the location of the distribution, +while the scale parameter [c] determines the width of the distribution. +If the location is +zero, and the scale 1, then the result is a standard holtsmark +distribution. + +The distribution especially used in astrophysics for modeling gravitational bodies. + +The following graph shows how the distributions moves as the +location parameter changes: + +[graph holtsmark_pdf1] + +While the following graph shows how the shape (scale) parameter alters +the distribution: + +[graph holtsmark_pdf2] + +[h4 Member Functions] + + BOOST_MATH_GPU_ENABLED holtsmark_distribution(RealType location = 0, RealType scale = 1); + +Constructs a holtsmark distribution, with location parameter /location/ +and scale parameter /scale/. When these parameters take their default +values (location = 0, scale = 1) +then the result is a Standard holtsmark Distribution. + +Requires scale > 0, otherwise calls __domain_error. + + BOOST_MATH_GPU_ENABLED RealType location()const; + +Returns the location parameter of the distribution. + + BOOST_MATH_GPU_ENABLED RealType scale()const; + +Returns the scale parameter of the distribution. + +[h4 Non-member Accessors] + +All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] +that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. + +Note however that the holtsmark distribution does not have a skewness, +kurtosis, etc. See __math_undefined +[/link math_toolkit.pol_ref.assert_undefined mathematically undefined function] +to control whether these should fail to compile with a BOOST_STATIC_ASSERTION_FAILURE, +which is the default. + +Alternately, the functions __skewness, __kurtosis and __kurtosis_excess will all +return a __domain_error if called. + +The domain of the random variable is \[-[max_value], +[min_value]\]. + +[h4 Accuracy] + +The error is within 4 epsilon. + +Errors in the PDF at 64-bit double precision: + +[$../graphs/holtsmark_pdf_accuracy_64.png] + +Errors in the CDF-complement at 64-bit double precision: + +[$../graphs/holtsmark_ccdf_accuracy_64.png] + +[h4 Implementation] + +See references. + +[h4 References] + +* [@http://en.wikipedia.org/wiki/holtsmark_distribution Holtsmark Distribution] +* T. Yoshimura, Numerical Evaluation and High Precision Approximation Formula for Holtsmark Distribution, +DOI: 10.36227/techrxiv.172054657.73020014/v1, 2024. + +[endsect][/section:holtsmark_dist holtsmark] + +[/ holtsmark.qbk + Copyright Takuma Yoshimura 2024. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt). +] diff --git a/doc/distributions/inverse_chi_squared.qbk b/doc/distributions/inverse_chi_squared.qbk index 7bc75a8813..8d67082d07 100644 --- a/doc/distributions/inverse_chi_squared.qbk +++ b/doc/distributions/inverse_chi_squared.qbk @@ -12,11 +12,11 @@ typedef RealType value_type; typedef Policy policy_type; - inverse_chi_squared_distribution(RealType df = 1); // Not explicitly scaled, default 1/df. - inverse_chi_squared_distribution(RealType df, RealType scale = 1/df); // Scaled. + BOOST_MATH_GPU_ENABLED inverse_chi_squared_distribution(RealType df = 1); // Not explicitly scaled, default 1/df. + BOOST_MATH_GPU_ENABLED inverse_chi_squared_distribution(RealType df, RealType scale = 1/df); // Scaled. - RealType degrees_of_freedom()const; // Default 1. - RealType scale()const; // Optional scale [xi] (variance), default 1/degrees_of_freedom. + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom()const; // Default 1. + BOOST_MATH_GPU_ENABLED RealType scale()const; // Optional scale [xi] (variance), default 1/degrees_of_freedom. }; }} // namespace boost // namespace math @@ -99,8 +99,8 @@ varies for a few values of parameters [nu] and [xi]: [h4 Member Functions] - inverse_chi_squared_distribution(RealType df = 1); // Implicitly scaled 1/df. - inverse_chi_squared_distribution(RealType df = 1, RealType scale); // Explicitly scaled. + BOOST_MATH_GPU_ENABLED inverse_chi_squared_distribution(RealType df = 1); // Implicitly scaled 1/df. + BOOST_MATH_GPU_ENABLED inverse_chi_squared_distribution(RealType df = 1, RealType scale); // Explicitly scaled. Constructs an inverse chi_squared distribution with [nu] degrees of freedom ['df], and scale ['scale] with default value 1\/df. @@ -108,11 +108,11 @@ and scale ['scale] with default value 1\/df. Requires that the degrees of freedom [nu] parameter is greater than zero, otherwise calls __domain_error. - RealType degrees_of_freedom()const; + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom()const; Returns the degrees_of_freedom [nu] parameter of this distribution. - RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; Returns the scale [xi] parameter of this distribution. @@ -120,6 +120,8 @@ Returns the scale [xi] parameter of this distribution. All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The domain of the random variate is \[0,+[infin]\]. [note Unlike some definitions, this implementation supports a random variate diff --git a/doc/distributions/inverse_gamma.qbk b/doc/distributions/inverse_gamma.qbk index 8fccbc19c4..f657ec31b4 100644 --- a/doc/distributions/inverse_gamma.qbk +++ b/doc/distributions/inverse_gamma.qbk @@ -12,10 +12,10 @@ typedef RealType value_type; typedef Policy policy_type; - inverse_gamma_distribution(RealType shape, RealType scale = 1) + BOOST_MATH_GPU_ENABLED inverse_gamma_distribution(RealType shape, RealType scale = 1) - RealType shape()const; - RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType shape()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; }; }} // namespaces @@ -63,18 +63,18 @@ varies as the parameters vary: [h4 Member Functions] - inverse_gamma_distribution(RealType shape = 1, RealType scale = 1); + BOOST_MATH_GPU_ENABLED inverse_gamma_distribution(RealType shape = 1, RealType scale = 1); Constructs an inverse gamma distribution with shape [alpha] and scale [beta]. Requires that the shape and scale parameters are greater than zero, otherwise calls __domain_error. - RealType shape()const; + BOOST_MATH_GPU_ENABLED RealType shape()const; Returns the [alpha] shape parameter of this inverse gamma distribution. - RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; Returns the [beta] scale parameter of this inverse gamma distribution. @@ -82,11 +82,16 @@ Returns the [beta] scale parameter of this inverse gamma distribution. All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The domain of the random variate is \[0,+[infin]\]. [note Unlike some definitions, this implementation supports a random variate equal to zero as a special case, returning zero for pdf and cdf.] +In this distribution the implementation of `logpdf` is specialized +to improve numerical accuracy. + [h4 Accuracy] The inverse gamma distribution is implemented in terms of the @@ -99,12 +104,13 @@ But in general, inverse_gamma results are accurate to a few epsilon, [h4 Implementation] In the following table [alpha] is the shape parameter of the distribution, -[alpha] is its scale parameter, /x/ is the random variate, /p/ is the probability +[beta] is its scale parameter, /x/ is the random variate, /p/ is the probability and /q = 1-p/. [table [[Function][Implementation Notes]] [[pdf][Using the relation: pdf = __gamma_p_derivative([alpha], [beta]/ x, [beta]) / x * x ]] +[[logpdf][log(pdf) = [alpha] * log([beta]) + (-[alpha]-1)*log(x) - lgamma([alpha]) - ([beta]/x) ]] [[cdf][Using the relation: p = __gamma_q([alpha], [beta] / x) ]] [[cdf complement][Using the relation: q = __gamma_p([alpha], [beta] / x) ]] [[quantile][Using the relation: x = [beta]/ __gamma_q_inv([alpha], p) ]] diff --git a/doc/distributions/inverse_gaussian.qbk b/doc/distributions/inverse_gaussian.qbk index c5b824385f..99ca4d7c25 100644 --- a/doc/distributions/inverse_gaussian.qbk +++ b/doc/distributions/inverse_gaussian.qbk @@ -12,11 +12,11 @@ typedef RealType value_type; typedef Policy policy_type; - inverse_gaussian_distribution(RealType mean = 1, RealType scale = 1); + BOOST_MATH_GPU_ENABLED inverse_gaussian_distribution(RealType mean = 1, RealType scale = 1); - RealType mean()const; // mean default 1. - RealType scale()const; // Optional scale, default 1 (unscaled). - RealType shape()const; // Shape = scale/mean. + BOOST_MATH_GPU_ENABLED RealType mean()const; // mean default 1. + BOOST_MATH_GPU_ENABLED RealType scale()const; // Optional scale, default 1 (unscaled). + BOOST_MATH_GPU_ENABLED RealType shape()const; // Shape = scale/mean. }; typedef inverse_gaussian_distribution inverse_gaussian; @@ -90,7 +90,7 @@ Another related parameterisation, the __wald_distrib (where mean [mu] is unity) [h4 Member Functions] - inverse_gaussian_distribution(RealType df = 1, RealType scale = 1); // optionally scaled. + BOOST_MATH_GPU_ENABLED inverse_gaussian_distribution(RealType df = 1, RealType scale = 1); // optionally scaled. Constructs an inverse_gaussian distribution with [mu] mean, and scale [lambda], with both default values 1. @@ -98,11 +98,11 @@ and scale [lambda], with both default values 1. Requires that both the mean [mu] parameter and scale [lambda] are greater than zero, otherwise calls __domain_error. - RealType mean()const; + BOOST_MATH_GPU_ENABLED RealType mean()const; Returns the mean [mu] parameter of this distribution. - RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; Returns the scale [lambda] parameter of this distribution. @@ -110,11 +110,16 @@ Returns the scale [lambda] parameter of this distribution. All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The domain of the random variate is \[0,+[infin]). [note Unlike some definitions, this implementation supports a random variate equal to zero as a special case, returning zero for both pdf and cdf.] +In this distribution the implementation of `logpdf` is specialized +to improve numerical accuracy. + [h4 Accuracy] The inverse_gaussian distribution is implemented in terms of the @@ -134,6 +139,7 @@ are used for the inverse gaussian function. [table [[Function] [Implementation Notes] ] [[pdf] [ [sqrt]([lambda]/ 2[pi]x[super 3]) e[super -[lambda](x - [mu])[sup2]/ 2[mu][sup2]x]]] +[[logpdf] [log(pdf) = (-[lambda]*pow([mu]-x, 2)/(x*[mu][super 2]) + log([lambda]) - 3*log(x) - log(2*[pi])) / 2 ]] [[cdf][ [Phi]{[sqrt]([lambda]/x) (x/[mu]-1)} + e[super 2[mu]/[lambda]] [Phi]{-[sqrt]([lambda]/[mu]) (1+x/[mu])} ]] [[cdf complement] [using complement of [Phi] above.] ] [[quantile][No closed form known. Estimated using a guess refined by Newton-Raphson iteration.]] diff --git a/doc/distributions/landau.qbk b/doc/distributions/landau.qbk new file mode 100644 index 0000000000..90dced0aa8 --- /dev/null +++ b/doc/distributions/landau.qbk @@ -0,0 +1,131 @@ +[section:landau_dist Landau Distribution] + +``#include `` + + template + class landau_distribution; + + typedef landau_distribution<> landau; + + template + class landau_distribution + { + public: + typedef RealType value_type; + typedef Policy policy_type; + + BOOST_MATH_GPU_ENABLED landau_distribution(RealType location = 0, RealType scale = 1); + + BOOST_MATH_GPU_ENABLED RealType location()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType bias()const; + }; + +The [@http://en.wikipedia.org/wiki/landau_distribution Landau distribution] +is named after Lev Landau. +It is special case of a [@http://en.wikipedia.org/wiki/Stable_distribution stable distribution] +with shape parameter [alpha]=1, [beta]=1. + +[@http://en.wikipedia.org/wiki/Probability_distribution probability distribution function PDF] +given by: + +[equation landau_ref1] [/f(x; \mu, c)=\frac{1}{\pi c} \int_{0}^{\infty} \exp(-t) \cos \left( t \left( \frac{x-\mu}{c}\right) + \frac{2t}{\pi} \log \left( \frac{t}{c} \right) \right) dt] + +The location parameter [mu] is the location of the distribution, +while the scale parameter [c] determines the width of the distribution, +but unlike other scalable distributions, +it has a peculiarity that changes the location of the distribution. If the location is +zero, and the scale 1, then the result is a standard landau +distribution. + +The distribution describe the statistical property of the energy loss by +charged particles as they traversing a thin layer of matter. + +The following graph shows how the distributions moves as the +location parameter changes: + +[graph landau_pdf1] + +While the following graph shows how the shape (scale) parameter alters +the distribution: + +[graph landau_pdf2] + +[h4 Member Functions] + + BOOST_MATH_GPU_ENABLED landau_distribution(RealType location = 0, RealType scale = 1); + +Constructs a landau distribution, with location parameter /location/ +and scale parameter /scale/. When these parameters take their default +values (location = 0, scale = 1) +then the result is a Standard landau Distribution. + +Requires scale > 0, otherwise calls __domain_error. + + BOOST_MATH_GPU_ENABLED RealType location()const; + +Returns the location parameter of the distribution. + + BOOST_MATH_GPU_ENABLED RealType scale()const; + +Returns the scale parameter of the distribution. + + BOOST_MATH_GPU_ENABLED RealType bias()const; + +Returns the amount of translation by the scale parameter. +[expression bias = - 2 / [pi] log(c)] + +[h4 Non-member Accessors] + +All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] +that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. + +Note however that the landau distribution does not have a mean, +standard deviation, etc. See __math_undefined +[/link math_toolkit.pol_ref.assert_undefined mathematically undefined function] +to control whether these should fail to compile with a BOOST_STATIC_ASSERTION_FAILURE, +which is the default. + +Alternately, the functions __mean, __sd, +__variance, __skewness, __kurtosis and __kurtosis_excess will all +return a __domain_error if called. + +The domain of the random variable is \[-[max_value], +[min_value]\]. + +[h4 Accuracy] + +The error is within 4 epsilon except for the rapidly decaying left tail. + +Errors in the PDF at 64-bit double precision: + +[$../graphs/landau_pdf_accuracy_64.png] + +Errors in the CDF at 64-bit double precision: + +[$../graphs/landau_cdf_accuracy_64.png] + +Errors in the CDF-complement at 64-bit double precision: + +[$../graphs/landau_ccdf_accuracy_64.png] + +[h4 Implementation] + +See references. + +[h4 References] + +* [@http://en.wikipedia.org/wiki/landau_distribution landau distribution] +* T. Yoshimura, Numerical Evaluation and High Precision Approximation Formula for Landau Distribution, +DOI: 10.36227/techrxiv.171822215.53612870/v2, 2024. + +[endsect][/section:landau_dist landau] + +[/ landau.qbk + Copyright Takuma Yoshimura 2024. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt). +] diff --git a/doc/distributions/laplace.qbk b/doc/distributions/laplace.qbk index 93327e0228..6115efcb8b 100644 --- a/doc/distributions/laplace.qbk +++ b/doc/distributions/laplace.qbk @@ -17,10 +17,10 @@ typedef RealType value_type; typedef Policy policy_type; // Construct: - laplace_distribution(RealType location = 0, RealType scale = 1); + BOOST_MATH_GPU_ENABLED laplace_distribution(RealType location = 0, RealType scale = 1); // Accessors: - RealType location()const; - RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType location()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; }; }} // namespaces @@ -49,7 +49,7 @@ Note that the domain of the random variable remains [h4 Member Functions] - laplace_distribution(RealType location = 0, RealType scale = 1); + BOOST_MATH_GPU_ENABLED laplace_distribution(RealType location = 0, RealType scale = 1); Constructs a laplace distribution with location /location/ and scale /scale/. @@ -61,11 +61,11 @@ The scale parameter is proportional to the standard deviation of the random vari Requires that the scale parameter is greater than zero, otherwise calls __domain_error. - RealType location()const; + BOOST_MATH_GPU_ENABLED RealType location()const; Returns the /location/ parameter of this distribution. - RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; Returns the /scale/ parameter of this distribution. @@ -73,9 +73,14 @@ Returns the /scale/ parameter of this distribution. All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The domain of the random variable is \[-[infin],+[infin]\]. +In this distribution the implementation of both `logcdf`, and `logpdf` are specialized +to improve numerical accuracy. + [h4 Accuracy] The laplace distribution is implemented in terms of the @@ -90,11 +95,19 @@ and its complement /q = 1-p/. [table [[Function][Implementation Notes]] [[pdf][Using the relation: pdf = e[super -abs(x-[mu]) \/ [sigma]] \/ (2 * [sigma]) ]] +[[logpdf][log(pdf) = -abs(x-[mu])/[sigma] - log([sigma]) - log(2) ]] [[cdf][Using the relations: x < [mu] : p = e[super (x-[mu])/[sigma] ] \/ [sigma] x >= [mu] : p = 1 - e[super ([mu]-x)/[sigma] ] \/ [sigma] +]] +[[logcdf][log(cdf) = + +x < [mu] : p = ((x - [mu]) / [sigma]) - ln(2) + +x >= [mu] : p = log1p(-exp(([mu]-x) / [sigma]) / 2) + ]] [[cdf complement][Using the relation: diff --git a/doc/distributions/logistic.qbk b/doc/distributions/logistic.qbk index 0a22b48d42..dc42a5d8b3 100644 --- a/doc/distributions/logistic.qbk +++ b/doc/distributions/logistic.qbk @@ -15,10 +15,10 @@ typedef RealType value_type; typedef Policy policy_type; // Construct: - logistic_distribution(RealType location = 0, RealType scale = 1); + BOOST_MATH_GPU_ENABLED logistic_distribution(RealType location = 0, RealType scale = 1); // Accessors: - RealType location()const; // location. - RealType scale()const; // scale. + BOOST_MATH_GPU_ENABLED RealType location()const; // location. + BOOST_MATH_GPU_ENABLED RealType scale()const; // scale. }; @@ -39,17 +39,17 @@ parameters change: [h4 Member Functions] - logistic_distribution(RealType u = 0, RealType s = 1); + BOOST_MATH_GPU_ENABLED logistic_distribution(RealType u = 0, RealType s = 1); Constructs a logistic distribution with location /u/ and scale /s/. Requires `scale > 0`, otherwise a __domain_error is raised. - RealType location()const; + BOOST_MATH_GPU_ENABLED RealType location()const; Returns the location of this distribution. - RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; Returns the scale of this distribution. @@ -57,6 +57,8 @@ Returns the scale of this distribution. All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The domain of the random variable is \[-\[max_value\], +\[min_value\]\]. However, the pdf and cdf support inputs of +[infin] and -[infin] @@ -67,6 +69,9 @@ At `p=1` and `p=0`, the quantile function returns the result of quantile function returns the result of -__overflow_error and +__overflow_error respectively. +In this distribution the implementation of `logcdf` is specialized +to improve numerical accuracy. + [h4 Accuracy] The logistic distribution is implemented in terms of the `std::exp` @@ -82,6 +87,7 @@ in such cases, only a low /absolute error/ can be guaranteed. [[Function][Implementation Notes]] [[pdf][Using the relation: pdf = e[super -(x-u)/s] / (s*(1+e[super -(x-u)/s])[super 2])]] [[cdf][Using the relation: p = 1/(1+e[super -(x-u)/s])]] +[[logcdf][log(cdf) = -log1p(exp((u-x)/s)) ]] [[cdf complement][Using the relation: q = 1/(1+e[super (x-u)/s])]] [[quantile][Using the relation: x = u - s*log(1/p-1)]] [[quantile from the complement][Using the relation: x = u + s*log(p/1-p)]] diff --git a/doc/distributions/lognormal.qbk b/doc/distributions/lognormal.qbk index 6e76043570..901b59ed82 100644 --- a/doc/distributions/lognormal.qbk +++ b/doc/distributions/lognormal.qbk @@ -17,10 +17,10 @@ typedef RealType value_type; typedef Policy policy_type; // Construct: - lognormal_distribution(RealType location = 0, RealType scale = 1); + BOOST_MATH_GPU_ENABLED lognormal_distribution(RealType location = 0, RealType scale = 1); // Accessors: - RealType location()const; - RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType location()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; }; }} // namespaces @@ -51,7 +51,7 @@ The next graph illustrates the effect of the scale parameter on the PDF: [h4 Member Functions] - lognormal_distribution(RealType location = 0, RealType scale = 1); + BOOST_MATH_GPU_ENABLED lognormal_distribution(RealType location = 0, RealType scale = 1); Constructs a lognormal distribution with location /location/ and scale /scale/. @@ -65,11 +65,11 @@ logarithm of the random variate. Requires that the scale parameter is greater than zero, otherwise calls __domain_error. - RealType location()const; + BOOST_MATH_GPU_ENABLED RealType location()const; Returns the /location/ parameter of this distribution. - RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; Returns the /scale/ parameter of this distribution. @@ -77,6 +77,8 @@ Returns the /scale/ parameter of this distribution. All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The domain of the random variable is \[0,+[infin]\]. diff --git a/doc/distributions/mapairy.qbk b/doc/distributions/mapairy.qbk new file mode 100644 index 0000000000..817fb980da --- /dev/null +++ b/doc/distributions/mapairy.qbk @@ -0,0 +1,121 @@ +[section:mapairy_dist Map-Airy Distribution] + +``#include `` + + template + class mapairy_distribution; + + typedef mapairy_distribution<> mapairy; + + template + class mapairy_distribution + { + public: + typedef RealType value_type; + typedef Policy policy_type; + + BOOST_MATH_GPU_ENABLED mapairy_distribution(RealType location = 0, RealType scale = 1); + + BOOST_MATH_GPU_ENABLED RealType location()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; + }; + +It is special case of a [@http://en.wikipedia.org/wiki/Stable_distribution stable distribution] +with shape parameter [alpha]=3/2, [beta]=1. + +This distribution is also defined as [beta] = −1, which is inverted about the x-axis. + +[@http://en.wikipedia.org/wiki/Probability_distribution probability distribution function PDF] +given by: + +[equation mapairy_ref1] [/f(x; \mu=0, c=1/\sqrt[3]{18}) = 2 \exp \left( \frac{2}{3} x^3 \right) \left( -x \mathrm{Ai}(x^2) - \mathrm{Ai}'(x^2) \right)] + +The location parameter [mu] is the location of the distribution, +while the scale parameter [c] determines the width of the distribution. If the location is +zero, and the scale 1, then the result is a standard map-airy +distribution. + +The distribution describes the probability distribution of the area under a Brownian excursion over a unit interval. + +The following graph shows how the distributions moves as the +location parameter changes: + +[graph mapairy_pdf1] + +While the following graph shows how the shape (scale) parameter alters +the distribution: + +[graph mapairy_pdf2] + +[h4 Member Functions] + + BOOST_MATH_GPU_ENABLED mapairy_distribution(RealType location = 0, RealType scale = 1); + +Constructs a mapairy distribution, with location parameter /location/ +and scale parameter /scale/. When these parameters take their default +values (location = 0, scale = 1) +then the result is a Standard map-airy Distribution. + +Requires scale > 0, otherwise calls __domain_error. + + BOOST_MATH_GPU_ENABLED RealType location()const; + +Returns the location parameter of the distribution. + + BOOST_MATH_GPU_ENABLED RealType scale()const; + +Returns the scale parameter of the distribution. + +[h4 Non-member Accessors] + +All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] +that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. + +Note however that the map-airy distribution does not have a skewness, +kurtosis, etc. See __math_undefined +[/link math_toolkit.pol_ref.assert_undefined mathematically undefined function] +to control whether these should fail to compile with a BOOST_STATIC_ASSERTION_FAILURE, +which is the default. + +Alternately, the functions __skewness, __kurtosis and __kurtosis_excess will all +return a __domain_error if called. + +The domain of the random variable is \[-[max_value], +[min_value]\]. + +[h4 Accuracy] + +The error is within 4 epsilon except for the rapidly decaying left tail. + +Errors in the PDF at 64-bit double precision: + +[$../graphs/mapairy_pdf_accuracy_64.png] + +Errors in the CDF at 64-bit double precision: + +[$../graphs/mapairy_cdf_accuracy_64.png] + +Errors in the CDF-complement at 64-bit double precision: + +[$../graphs/mapairy_ccdf_accuracy_64.png] + +[h4 Implementation] + +See references. + +[h4 References] + +* [@https://mathworld.wolfram.com/Map-AiryDistribution.html Wolfram MathWorld: Map-Airy Distribution] +* T. Yoshimura, Numerical Evaluation and High Precision Approximation Formula for Map-Airy Distribution, +DOI: 10.36227/techrxiv.172053942.27675733/v1, 2024. + +[endsect][/section:mapairy_dist mapairy] + +[/ mapairy.qbk + Copyright Takuma Yoshimura 2024. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt). +] diff --git a/doc/distributions/nc_beta.qbk b/doc/distributions/nc_beta.qbk index 8e2c816559..478b545020 100644 --- a/doc/distributions/nc_beta.qbk +++ b/doc/distributions/nc_beta.qbk @@ -18,14 +18,14 @@ typedef Policy policy_type; // Constructor: - non_central_beta_distribution(RealType alpha, RealType beta, RealType lambda); + BOOST_MATH_GPU_ENABLED non_central_beta_distribution(RealType alpha, RealType beta, RealType lambda); // Accessor to shape parameters: - RealType alpha()const; - RealType beta()const; + BOOST_MATH_GPU_ENABLED RealType alpha()const; + BOOST_MATH_GPU_ENABLED RealType beta()const; // Accessor to non-centrality parameter lambda: - RealType non_centrality()const; + BOOST_MATH_GPU_ENABLED RealType non_centrality()const; }; }} // namespaces @@ -59,22 +59,22 @@ for different values of [lambda]: [h4 Member Functions] - non_central_beta_distribution(RealType a, RealType b, RealType lambda); + BOOST_MATH_GPU_ENABLED non_central_beta_distribution(RealType a, RealType b, RealType lambda); Constructs a noncentral beta distribution with shape parameters /a/ and /b/ and non-centrality parameter /lambda/. Requires a > 0, b > 0 and lambda >= 0, otherwise calls __domain_error. - RealType alpha()const; + BOOST_MATH_GPU_ENABLED RealType alpha()const; Returns the parameter /a/ from which this object was constructed. - RealType beta()const; + BOOST_MATH_GPU_ENABLED RealType beta()const; Returns the parameter /b/ from which this object was constructed. - RealType non_centrality()const; + BOOST_MATH_GPU_ENABLED RealType non_centrality()const; Returns the parameter /lambda/ from which this object was constructed. @@ -83,6 +83,8 @@ Returns the parameter /lambda/ from which this object was constructed. Most of the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] are supported: __cdf, __pdf, __quantile, __mean, __variance, __sd, __median, __mode, __hazard, __chf, __range and __support. +For this distribution these functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. Mean and variance are implemented using hypergeometric pfq functions and relations given in [@http://reference.wolfram.com/mathematica/ref/NoncentralBetaDistribution.html Wolfram Noncentral Beta Distribution]. diff --git a/doc/distributions/nc_chi_squared.qbk b/doc/distributions/nc_chi_squared.qbk index 72235db6a3..9ab0f6f8d2 100644 --- a/doc/distributions/nc_chi_squared.qbk +++ b/doc/distributions/nc_chi_squared.qbk @@ -18,22 +18,22 @@ typedef Policy policy_type; // Constructor: - non_central_chi_squared_distribution(RealType v, RealType lambda); + BOOST_MATH_GPU_ENABLED non_central_chi_squared_distribution(RealType v, RealType lambda); // Accessor to degrees of freedom parameter v: - RealType degrees_of_freedom()const; + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom()const; // Accessor to non centrality parameter lambda: - RealType non_centrality()const; + BOOST_MATH_GPU_ENABLED RealType non_centrality()const; // Parameter finders: - static RealType find_degrees_of_freedom(RealType lambda, RealType x, RealType p); + BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom(RealType lambda, RealType x, RealType p); template - static RealType find_degrees_of_freedom(const complemented3_type& c); + BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom(const complemented3_type& c); - static RealType find_non_centrality(RealType v, RealType x, RealType p); + BOOST_MATH_GPU_ENABLED static RealType find_non_centrality(RealType v, RealType x, RealType p); template - static RealType find_non_centrality(const complemented3_type& c); + BOOST_MATH_GPU_ENABLED static RealType find_non_centrality(const complemented3_type& c); }; }} // namespaces @@ -70,43 +70,42 @@ for different values of [lambda]: [h4 Member Functions] - non_central_chi_squared_distribution(RealType v, RealType lambda); + BOOST_MATH_GPU_ENABLED non_central_chi_squared_distribution(RealType v, RealType lambda); Constructs a Chi-Squared distribution with [nu] degrees of freedom and non-centrality parameter /lambda/. Requires [nu] > 0 and lambda >= 0, otherwise calls __domain_error. - RealType degrees_of_freedom()const; + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom()const; Returns the parameter [nu] from which this object was constructed. - RealType non_centrality()const; + BOOST_MATH_GPU_ENABLED RealType non_centrality()const; Returns the parameter /lambda/ from which this object was constructed. - - static RealType find_degrees_of_freedom(RealType lambda, RealType x, RealType p); + BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom(RealType lambda, RealType x, RealType p); This function returns the number of degrees of freedom [nu] such that: `cdf(non_central_chi_squared(v, lambda), x) == p` - template - static RealType find_degrees_of_freedom(const complemented3_type& c); + template + BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom(const complemented3_type& c); When called with argument `boost::math::complement(lambda, x, q)` this function returns the number of degrees of freedom [nu] such that: `cdf(complement(non_central_chi_squared(v, lambda), x)) == q`. - static RealType find_non_centrality(RealType v, RealType x, RealType p); + BOOST_MATH_GPU_ENABLED static RealType find_non_centrality(RealType v, RealType x, RealType p); This function returns the non centrality parameter /lambda/ such that: `cdf(non_central_chi_squared(v, lambda), x) == p` - template - static RealType find_non_centrality(const complemented3_type& c); + template + BOOST_MATH_GPU_ENABLED static RealType find_non_centrality(const complemented3_type& c); When called with argument `boost::math::complement(v, x, q)` this function returns the non centrality parameter /lambda/ such that: @@ -117,6 +116,8 @@ this function returns the non centrality parameter /lambda/ such that: All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The domain of the random variable is \[0, +[infin]\]. diff --git a/doc/distributions/nc_f.qbk b/doc/distributions/nc_f.qbk index 6436c34336..d31c8116bc 100644 --- a/doc/distributions/nc_f.qbk +++ b/doc/distributions/nc_f.qbk @@ -18,14 +18,14 @@ typedef Policy policy_type; // Constructor: - non_central_f_distribution(RealType v1, RealType v2, RealType lambda); + BOOST_MATH_GPU_ENABLED non_central_f_distribution(RealType v1, RealType v2, RealType lambda); // Accessor to degrees_of_freedom parameters v1 & v2: - RealType degrees_of_freedom1()const; - RealType degrees_of_freedom2()const; + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom1()const; + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom2()const; // Accessor to non-centrality parameter lambda: - RealType non_centrality()const; + BOOST_MATH_GPU_ENABLED RealType non_centrality()const; }; }} // namespaces @@ -55,22 +55,22 @@ for different values of [lambda]: [h4 Member Functions] - non_central_f_distribution(RealType v1, RealType v2, RealType lambda); + BOOST_MATH_GPU_ENABLED non_central_f_distribution(RealType v1, RealType v2, RealType lambda); Constructs a non-central beta distribution with parameters /v1/ and /v2/ and non-centrality parameter /lambda/. Requires /v1/ > 0, /v2/ > 0 and lambda >= 0, otherwise calls __domain_error. - RealType degrees_of_freedom1()const; + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom1()const; Returns the parameter /v1/ from which this object was constructed. - RealType degrees_of_freedom2()const; + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom2()const; Returns the parameter /v2/ from which this object was constructed. - RealType non_centrality()const; + BOOST_MATH_GPU_ENABLED RealType non_centrality()const; Returns the non-centrality parameter /lambda/ from which this object was constructed. @@ -78,6 +78,8 @@ Returns the non-centrality parameter /lambda/ from which this object was constru All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The domain of the random variable is \[0, +[infin]\]. diff --git a/doc/distributions/negative_binomial.qbk b/doc/distributions/negative_binomial.qbk index ee61beef3d..5a23ce23aa 100644 --- a/doc/distributions/negative_binomial.qbk +++ b/doc/distributions/negative_binomial.qbk @@ -17,28 +17,28 @@ typedef RealType value_type; typedef Policy policy_type; // Constructor from successes and success_fraction: - negative_binomial_distribution(RealType r, RealType p); + BOOST_MATH_GPU_ENABLED negative_binomial_distribution(RealType r, RealType p); // Parameter accessors: - RealType success_fraction() const; - RealType successes() const; + BOOST_MATH_GPU_ENABLED RealType success_fraction() const; + BOOST_MATH_GPU_ENABLED RealType successes() const; // Bounds on success fraction: - static RealType find_lower_bound_on_p( + BOOST_MATH_GPU_ENABLED static RealType find_lower_bound_on_p( RealType trials, RealType successes, RealType probability); // alpha - static RealType find_upper_bound_on_p( + BOOST_MATH_GPU_ENABLED static RealType find_upper_bound_on_p( RealType trials, RealType successes, RealType probability); // alpha // Estimate min/max number of trials: - static RealType find_minimum_number_of_trials( + BOOST_MATH_GPU_ENABLED static RealType find_minimum_number_of_trials( RealType k, // Number of failures. RealType p, // Success fraction. RealType probability); // Probability threshold alpha. - static RealType find_maximum_number_of_trials( + BOOST_MATH_GPU_ENABLED static RealType find_maximum_number_of_trials( RealType k, // Number of failures. RealType p, // Success fraction. RealType probability); // Probability threshold alpha. @@ -112,7 +112,7 @@ poisson([lambda]) = lim [sub r [rarr] [infin]] negative_binomial(r, r / ([lambda [h5 Construct] - negative_binomial_distribution(RealType r, RealType p); + BOOST_MATH_GPU_ENABLED negative_binomial_distribution(RealType r, RealType p); Constructor: /r/ is the total number of successes, /p/ is the probability of success of a single trial. @@ -121,11 +121,11 @@ Requires: `r > 0` and `0 <= p <= 1`. [h5 Accessors] - RealType success_fraction() const; // successes / trials (0 <= p <= 1) + BOOST_MATH_GPU_ENABLED RealType success_fraction() const; // successes / trials (0 <= p <= 1) Returns the parameter /p/ from which this distribution was constructed. - RealType successes() const; // required successes (r > 0) + BOOST_MATH_GPU_ENABLED RealType successes() const; // required successes (r > 0) Returns the parameter /r/ from which this distribution was constructed. @@ -134,7 +134,7 @@ see __binomial_distrib for more discussion. [h5 Lower Bound on Parameter p] - static RealType find_lower_bound_on_p( + BOOST_MATH_GPU_ENABLED static RealType find_lower_bound_on_p( RealType failures, RealType successes, RealType probability) // (0 <= alpha <= 1), 0.05 equivalent to 95% confidence. @@ -170,7 +170,7 @@ Computational statistics and data analysis, 2005, vol. 48, no3, 605-621]. [h5 Upper Bound on Parameter p] - static RealType find_upper_bound_on_p( + BOOST_MATH_GPU_ENABLED static RealType find_upper_bound_on_p( RealType trials, RealType successes, RealType alpha); // (0 <= alpha <= 1), 0.05 equivalent to 95% confidence. @@ -206,7 +206,7 @@ Computational statistics and data analysis, 2005, vol. 48, no3, 605-621]. [h5 Estimating Number of Trials to Ensure at Least a Certain Number of Failures] - static RealType find_minimum_number_of_trials( + BOOST_MATH_GPU_ENABLED static RealType find_minimum_number_of_trials( RealType k, // number of failures. RealType p, // success fraction. RealType alpha); // probability threshold (0.05 equivalent to 95%). @@ -236,7 +236,7 @@ of observing k failures or fewer. [h5 Estimating Number of Trials to Ensure a Maximum Number of Failures or Less] - static RealType find_maximum_number_of_trials( + BOOST_MATH_GPU_ENABLED static RealType find_maximum_number_of_trials( RealType k, // number of failures. RealType p, // success fraction. RealType alpha); // probability threshold (0.05 equivalent to 95%). @@ -266,6 +266,8 @@ of observing more than k failures. All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. However it's worth taking a moment to define what these actually mean in the context of this distribution: diff --git a/doc/distributions/non_members.qbk b/doc/distributions/non_members.qbk index 9c900bf127..99728b8700 100644 --- a/doc/distributions/non_members.qbk +++ b/doc/distributions/non_members.qbk @@ -17,6 +17,8 @@ to go straight to the function you want if you already know its name. * __hazard. * __kurtosis. * __kurtosis_excess +* __logcdf. +* __logpdf. * __mean. * __median. * __mode. @@ -349,6 +351,20 @@ Kurtosis excess can have a value from -2 to + infinity. The kurtosis excess of a normal distribution is zero. +[h4:logcdf Natural Log of the Cumulative Distribution Function] + + template + RealType logcdf(const ``['Distribution-Type]``& dist); + +Returns the natural log of the CDF of distribution /dist/. + +[h4:logpdf Natural Log of the Probability Density Function] + + template + RealType logcdf(const ``['Distribution-Type]``& dist); + +Returns the natural log of the CDF of distribution /dist/. + [h4:cdfPQ P and Q] The terms P and Q are sometimes used to refer to the __cdf diff --git a/doc/distributions/normal.qbk b/doc/distributions/normal.qbk index 52ac44e96b..8e0f0c8fba 100644 --- a/doc/distributions/normal.qbk +++ b/doc/distributions/normal.qbk @@ -17,13 +17,13 @@ typedef RealType value_type; typedef Policy policy_type; // Construct: - normal_distribution(RealType mean = 0, RealType sd = 1); + BOOST_MATH_GPU_ENABLED normal_distribution(RealType mean = 0, RealType sd = 1); // Accessors: - RealType mean()const; // location. - RealType standard_deviation()const; // scale. + BOOST_MATH_GPU_ENABLED RealType mean()const; // location. + BOOST_MATH_GPU_ENABLED RealType standard_deviation()const; // scale. // Synonyms, provided to allow generic use of find_location and find_scale. - RealType location()const; - RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType location()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; }; }} // namespaces @@ -53,20 +53,20 @@ and illustrated by this graph [h4 Member Functions] - normal_distribution(RealType mean = 0, RealType sd = 1); + BOOST_MATH_GPU_ENABLED normal_distribution(RealType mean = 0, RealType sd = 1); Constructs a normal distribution with mean /mean/ and standard deviation /sd/. Requires /sd/ > 0, otherwise __domain_error is called. - RealType mean()const; - RealType location()const; + BOOST_MATH_GPU_ENABLED RealType mean()const; + BOOST_MATH_GPU_ENABLED RealType location()const; both return the /mean/ of this distribution. - RealType standard_deviation()const; - RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType standard_deviation()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; both return the /standard deviation/ of this distribution. (Redundant location and scale function are provided to match other similar distributions, @@ -76,6 +76,8 @@ allowing the functions find_location and find_scale to be used generically). All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The domain of the random variable is \[-[max_value], +[min_value]\]. However, the pdf of +[infin] and -[infin] = 0 is also supported, @@ -97,6 +99,7 @@ and /s/ is its standard deviation. [table [[Function][Implementation Notes]] [[pdf][Using the relation: pdf = e[super -(x-m)[super 2]\/(2s[super 2])] \/ (s * sqrt(2*pi)) ]] +[[logpdf][log(pdf) = -log(s) - log(2*[pi])/2 - (x-mean)[super 2]/(2*s[super 2]) ]] [[cdf][Using the relation: p = 0.5 * __erfc(-(x-m)/(s*sqrt(2))) ]] [[cdf complement][Using the relation: q = 0.5 * __erfc((x-m)/(s*sqrt(2))) ]] [[quantile][Using the relation: x = m - s * sqrt(2) * __erfc_inv(2*p)]] diff --git a/doc/distributions/pareto.qbk b/doc/distributions/pareto.qbk index fcc7eee425..0161282ec9 100644 --- a/doc/distributions/pareto.qbk +++ b/doc/distributions/pareto.qbk @@ -17,10 +17,10 @@ public: typedef RealType value_type; // Constructor: - pareto_distribution(RealType scale = 1, RealType shape = 1) + BOOST_MATH_GPU_ENABLED pareto_distribution(RealType scale = 1, RealType shape = 1) // Accessors: - RealType scale()const; - RealType shape()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType shape()const; }; }} // namespaces @@ -50,7 +50,7 @@ And this graph illustrates how the PDF varies with the shape parameter [alpha]: [h4 Member Functions] - pareto_distribution(RealType scale = 1, RealType shape = 1); + BOOST_MATH_GPU_ENABLED pareto_distribution(RealType scale = 1, RealType shape = 1); Constructs a [@http://en.wikipedia.org/wiki/pareto_distribution pareto distribution] with shape /shape/ and scale /scale/. @@ -58,11 +58,11 @@ pareto distribution] with shape /shape/ and scale /scale/. Requires that the /shape/ and /scale/ parameters are both greater than zero, otherwise calls __domain_error. - RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; Returns the /scale/ parameter of this distribution. - RealType shape()const; + BOOST_MATH_GPU_ENABLED RealType shape()const; Returns the /shape/ parameter of this distribution. @@ -70,9 +70,14 @@ Returns the /shape/ parameter of this distribution. All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The supported domain of the random variable is \[scale, [infin]\]. +In this distribution the implementation of `logcdf` is specialized +to improve numerical accuracy. + [h4 Accuracy] The Pareto distribution is implemented in terms of the @@ -91,6 +96,7 @@ and its complement /q = 1-p/. [[Function][Implementation Notes]] [[pdf][Using the relation: pdf p = [alpha][beta][super [alpha]]/x[super [alpha] +1] ]] [[cdf][Using the relation: cdf p = 1 - ([beta] / x)[super [alpha]] ]] +[[logcdf][log(cdf) = log1p(-pow([beta]/x, [alpha])) ]] [[cdf complement][Using the relation: q = 1 - p = -([beta] / x)[super [alpha]] ]] [[quantile][Using the relation: x = [beta] / (1 - p)[super 1/[alpha]] ]] [[quantile from the complement][Using the relation: x = [beta] / (q)[super 1/[alpha]] ]] diff --git a/doc/distributions/poisson.qbk b/doc/distributions/poisson.qbk index 50a3b12cea..533a6d6bad 100644 --- a/doc/distributions/poisson.qbk +++ b/doc/distributions/poisson.qbk @@ -17,8 +17,8 @@ typedef RealType value_type; typedef Policy policy_type; - poisson_distribution(RealType mean = 1); // Constructor. - RealType mean()const; // Accessor. + BOOST_MATH_GPU_ENABLED poisson_distribution(RealType mean = 1); // Constructor. + BOOST_MATH_GPU_ENABLED RealType mean()const; // Accessor. } }} // namespaces boost::math @@ -47,11 +47,11 @@ The following graph illustrates how the PDF varies with the parameter [lambda]: [h4 Member Functions] - poisson_distribution(RealType mean = 1); + BOOST_MATH_GPU_ENABLED poisson_distribution(RealType mean = 1); Constructs a poisson distribution with mean /mean/. - RealType mean()const; + BOOST_MATH_GPU_ENABLED RealType mean()const; Returns the /mean/ of this distribution. @@ -59,9 +59,14 @@ Returns the /mean/ of this distribution. All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The domain of the random variable is \[0, [infin]\]. +In this distribution the implementation of `logpdf` is specialized +to improve numerical accuracy. + [h4 Accuracy] The Poisson distribution is implemented in terms of the @@ -81,6 +86,7 @@ In the following table [lambda] is the mean of the distribution, [table [[Function][Implementation Notes]] [[pdf][Using the relation: pdf = e[super -[lambda]] [lambda][super k] \/ k! ]] +[[logpdf][log(pdf) = -lgamma(k+1) + k*log([lambda]) - [lambda] if k > 0 and [lambda] > 0 ]] [[cdf][Using the relation: p = [Gamma](k+1, [lambda]) \/ k! = __gamma_q(k+1, [lambda])]] [[cdf complement][Using the relation: q = __gamma_p(k+1, [lambda]) ]] [[quantile][Using the relation: k = __gamma_q_inva([lambda], p) - 1]] diff --git a/doc/distributions/rayleigh.qbk b/doc/distributions/rayleigh.qbk index 5fd6fe44c2..a28d5f577b 100644 --- a/doc/distributions/rayleigh.qbk +++ b/doc/distributions/rayleigh.qbk @@ -18,9 +18,9 @@ typedef RealType value_type; typedef Policy policy_type; // Construct: - rayleigh_distribution(RealType sigma = 1) + BOOST_MATH_GPU_ENABLED rayleigh_distribution(RealType sigma = 1) // Accessors: - RealType sigma()const; + BOOST_MATH_GPU_ENABLED RealType sigma()const; }; }} // namespaces @@ -58,7 +58,7 @@ and [@http://en.wikipedia.org/wiki/Weibull_distribution Weibull] distributions a [h4 Member Functions] - rayleigh_distribution(RealType sigma = 1); + BOOST_MATH_GPU_ENABLED rayleigh_distribution(RealType sigma = 1); Constructs a [@http://en.wikipedia.org/wiki/Rayleigh_distribution Rayleigh distribution] with [sigma] /sigma/. @@ -66,7 +66,7 @@ Rayleigh distribution] with [sigma] /sigma/. Requires that the [sigma] parameter is greater than zero, otherwise calls __domain_error. - RealType sigma()const; + BOOST_MATH_GPU_ENABLED RealType sigma()const; Returns the /sigma/ parameter of this distribution. @@ -74,9 +74,14 @@ Returns the /sigma/ parameter of this distribution. All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The domain of the random variable is \[0, max_value\]. +In this distribution the implementation of both `logcdf`, and `logpdf` are specialized +to improve numerical accuracy. + [h4 Accuracy] The Rayleigh distribution is implemented in terms of the @@ -92,7 +97,9 @@ In the following table [sigma] is the sigma parameter of the distribution, [table [[Function][Implementation Notes]] [[pdf][Using the relation: pdf = x * exp(-x[super 2])/2 [sigma][super 2] ]] +[[logpdf][log(pdf) = -(x[super 2])/(2*[sigma][super 2]) - 2*log([sigma]) + log(x) ]] [[cdf][Using the relation: p = 1 - exp(-x[super 2]/2) [sigma][super 2]= -__expm1(-x[super 2]/2) [sigma][super 2]]] +[[logcdf][log(cdf) = log1p(-exp(-x[super 2] / (2*[sigma][super 2]))) ]] [[cdf complement][Using the relation: q = exp(-x[super 2]/ 2) * [sigma][super 2] ]] [[quantile][Using the relation: x = sqrt(-2 * [sigma] [super 2]) * log(1 - p)) = sqrt(-2 * [sigma] [super 2]) * __log1p(-p))]] [[quantile from the complement][Using the relation: x = sqrt(-2 * [sigma] [super 2]) * log(q)) ]] diff --git a/doc/distributions/saspoint5.qbk b/doc/distributions/saspoint5.qbk new file mode 100644 index 0000000000..06efbd32e5 --- /dev/null +++ b/doc/distributions/saspoint5.qbk @@ -0,0 +1,116 @@ +[section:saspoint5_dist S[alpha]S Point5 Distribution] + +``#include `` + + template + class saspoint5_distribution; + + typedef saspoint5_distribution<> saspoint5; + + template + class saspoint5_distribution + { + public: + typedef RealType value_type; + typedef Policy policy_type; + + BOOST_MATH_GPU_ENABLED saspoint5_distribution(RealType location = 0, RealType scale = 1); + + BOOST_MATH_GPU_ENABLED RealType location()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; + }; + +It is special case of a [@http://en.wikipedia.org/wiki/Stable_distribution stable distribution] +with shape parameter [alpha]=1/2, [beta]=0. + +[@http://en.wikipedia.org/wiki/Probability_distribution probability distribution function PDF] +given by: + +[equation saspoint5_ref1] [/f(x; \mu, c)=\frac{1}{2 \pi} \int_{-\infty}^{\infty} \exp( i t \mu - \sqrt{|c t|} ) e^{-i x t} dt] + +The location parameter [mu] is the location of the distribution, +while the scale parameter [c] determines the width of the distribution. +If the location is +zero, and the scale 1, then the result is a standard S[alpha]S Point5 +distribution. + +This distribution has heavier tails than the Cauchy distribution. + +The following graph shows how the distributions moves as the +location parameter changes: + +[graph saspoint5_pdf1] + +While the following graph shows how the shape (scale) parameter alters +the distribution: + +[graph saspoint5_pdf2] + +[h4 Member Functions] + + BOOST_MATH_GPU_ENABLED saspoint5_distribution(RealType location = 0, RealType scale = 1); + +Constructs a S[alpha]S Point5 distribution, with location parameter /location/ +and scale parameter /scale/. When these parameters take their default +values (location = 0, scale = 1) +then the result is a Standard S[alpha]S Point5 Distribution. + +Requires scale > 0, otherwise calls __domain_error. + + BOOST_MATH_GPU_ENABLED RealType location()const; + +Returns the location parameter of the distribution. + + BOOST_MATH_GPU_ENABLED RealType scale()const; + +Returns the scale parameter of the distribution. + +[h4 Non-member Accessors] + +All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] +that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. + +Note however that the S[alpha]S Point5 distribution does not have a mean, +standard deviation, etc. See __math_undefined +[/link math_toolkit.pol_ref.assert_undefined mathematically undefined function] +to control whether these should fail to compile with a BOOST_STATIC_ASSERTION_FAILURE, +which is the default. + +Alternately, the functions __mean, __sd, +__variance, __skewness, __kurtosis and __kurtosis_excess will all +return a __domain_error if called. + +The domain of the random variable is \[-[max_value], +[min_value]\]. + +[h4 Accuracy] + +The error is within 4 epsilon. + +Errors in the PDF at 64-bit double precision: + +[$../graphs/saspoint5_pdf_accuracy_64.png] + +Errors in the CDF-complement at 64-bit double precision: + +[$../graphs/saspoint5_ccdf_accuracy_64.png] + +[h4 Implementation] + +See references. + +[h4 References] + +* T. Yoshimura, Numerical Evaluation and High Precision Approximation Formula for S[alpha]S Point5 Distribution, +DOI: 10.36227/techrxiv.172055253.37208198/v1, 2024. + +[endsect][/section:saspoint5_dist saspoint5] + +[/ saspoint5.qbk + Copyright Takuma Yoshimura 2024. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt). +] diff --git a/doc/distributions/students_t.qbk b/doc/distributions/students_t.qbk index 9701ce9fca..3396048c5f 100644 --- a/doc/distributions/students_t.qbk +++ b/doc/distributions/students_t.qbk @@ -17,13 +17,13 @@ typedef Policy policy_type; // Constructor: - students_t_distribution(const RealType& v); + BOOST_MATH_GPU_ENABLED students_t_distribution(const RealType& v); // Accessor: - RealType degrees_of_freedom()const; + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom()const; // degrees of freedom estimation: - static RealType find_degrees_of_freedom( + BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom( RealType difference_from_mean, RealType alpha, RealType beta, @@ -62,7 +62,7 @@ illustrates how the PDF varies with the degrees of freedom [nu]: [h4 Member Functions] - students_t_distribution(const RealType& v); + BOOST_MATH_GPU_ENABLED students_t_distribution(const RealType& v); Constructs a Student's t-distribution with /v/ degrees of freedom. @@ -71,11 +71,11 @@ otherwise calls __domain_error. Note that non-integral degrees of freedom are supported, and are meaningful under certain circumstances. - RealType degrees_of_freedom()const; + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom()const; returns the number of degrees of freedom of this distribution. - static RealType find_degrees_of_freedom( + BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom( RealType difference_from_mean, RealType alpha, RealType beta, @@ -110,6 +110,8 @@ NIST Engineering Statistics Handbook]. All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The domain of the random variable is \[-[infin], +[infin]\]. diff --git a/doc/distributions/triangular.qbk b/doc/distributions/triangular.qbk index cd48f3310a..f32514f652 100644 --- a/doc/distributions/triangular.qbk +++ b/doc/distributions/triangular.qbk @@ -17,12 +17,12 @@ typedef RealType value_type; typedef Policy policy_type; - triangular_distribution(RealType lower = -1, RealType mode = 0, RealType upper = 1); // Constructor. + BOOST_MATH_GPU_ENABLED triangular_distribution(RealType lower = -1, RealType mode = 0, RealType upper = 1); // Constructor. : m_lower(lower), m_mode(mode), m_upper(upper) // Default is -1, 0, +1 symmetric triangular distribution. // Accessor functions. - RealType lower()const; - RealType mode()const; - RealType upper()const; + BOOST_MATH_GPU_ENABLED RealType lower()const; + BOOST_MATH_GPU_ENABLED RealType mode()const; + BOOST_MATH_GPU_ENABLED RealType upper()const; }; // class triangular_distribution }} // namespaces @@ -77,7 +77,7 @@ and cumulative distribution function [h4 Member Functions] - triangular_distribution(RealType lower = 0, RealType mode = 0 RealType upper = 1); + BOOST_MATH_GPU_ENABLED triangular_distribution(RealType lower = 0, RealType mode = 0 RealType upper = 1); Constructs a [@http://en.wikipedia.org/wiki/triangular_distribution triangular distribution] with lower /lower/ (a) and upper /upper/ (b). @@ -99,15 +99,15 @@ So, for example, to compute a variance using __WolframAlpha, use The parameters of a distribution can be obtained using these member functions: - RealType lower()const; + BOOST_MATH_GPU_ENABLED RealType lower()const; Returns the ['lower] parameter of this distribution (default -1). - RealType mode()const; + BOOST_MATH_GPU_ENABLED RealType mode()const; Returns the ['mode] parameter of this distribution (default 0). - RealType upper()const; + BOOST_MATH_GPU_ENABLED RealType upper()const; Returns the ['upper] parameter of this distribution (default+1). @@ -115,6 +115,8 @@ Returns the ['upper] parameter of this distribution (default+1). All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The domain of the random variable is \lower\ to \upper\, and the supported range is lower <= x <= upper. diff --git a/doc/distributions/uniform.qbk b/doc/distributions/uniform.qbk index 58c11735d6..56c965877e 100644 --- a/doc/distributions/uniform.qbk +++ b/doc/distributions/uniform.qbk @@ -16,11 +16,11 @@ public: typedef RealType value_type; - uniform_distribution(RealType lower = 0, RealType upper = 1); // Constructor. + BOOST_MATH_GPU_ENABLED uniform_distribution(RealType lower = 0, RealType upper = 1); // Constructor. : m_lower(lower), m_upper(upper) // Default is standard uniform distribution. // Accessor functions. - RealType lower()const; - RealType upper()const; + BOOST_MATH_GPU_ENABLED RealType lower()const; + BOOST_MATH_GPU_ENABLED RealType upper()const; }; // class uniform_distribution }} // namespaces @@ -66,7 +66,7 @@ Likewise for the CDF: [h4 Member Functions] - uniform_distribution(RealType lower = 0, RealType upper = 1); + BOOST_MATH_GPU_ENABLED uniform_distribution(RealType lower = 0, RealType upper = 1); Constructs a [@http://en.wikipedia.org/wiki/uniform_distribution uniform distribution] with lower /lower/ (a) and upper /upper/ (b). @@ -74,11 +74,11 @@ uniform distribution] with lower /lower/ (a) and upper /upper/ (b). Requires that the /lower/ and /upper/ parameters are both finite; otherwise if infinity or NaN then calls __domain_error. - RealType lower()const; + BOOST_MATH_GPU_ENABLED RealType lower()const; Returns the /lower/ parameter of this distribution. - RealType upper()const; + BOOST_MATH_GPU_ENABLED RealType upper()const; Returns the /upper/ parameter of this distribution. @@ -86,6 +86,8 @@ Returns the /upper/ parameter of this distribution. All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The domain of the random variable is any finite value, but the supported range is only /lower/ <= x <= /upper/. diff --git a/doc/distributions/weibull.qbk b/doc/distributions/weibull.qbk index 95c9e461e2..37139ab2c6 100644 --- a/doc/distributions/weibull.qbk +++ b/doc/distributions/weibull.qbk @@ -17,10 +17,10 @@ typedef RealType value_type; typedef Policy policy_type; // Construct: - weibull_distribution(RealType shape, RealType scale = 1) + BOOST_MATH_GPU_ENABLED weibull_distribution(RealType shape, RealType scale = 1) // Accessors: - RealType shape()const; - RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType shape()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; }; }} // namespaces @@ -65,7 +65,7 @@ Samuel Kotz & Saralees Nadarajah]. [h4 Member Functions] - weibull_distribution(RealType shape, RealType scale = 1); + BOOST_MATH_GPU_ENABLED weibull_distribution(RealType shape, RealType scale = 1); Constructs a [@http://en.wikipedia.org/wiki/Weibull_distribution Weibull distribution] with shape /shape/ and scale /scale/. @@ -73,11 +73,11 @@ Weibull distribution] with shape /shape/ and scale /scale/. Requires that the /shape/ and /scale/ parameters are both greater than zero, otherwise calls __domain_error. - RealType shape()const; + BOOST_MATH_GPU_ENABLED RealType shape()const; Returns the /shape/ parameter of this distribution. - RealType scale()const; + BOOST_MATH_GPU_ENABLED RealType scale()const; Returns the /scale/ parameter of this distribution. @@ -85,9 +85,14 @@ Returns the /scale/ parameter of this distribution. All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all distributions are supported: __usual_accessors. +For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can +be run on both host and device. The domain of the random variable is \[0, [infin]\]. +In this distribution the implementation of both `logcdf`, and `logpdf` are specialized +to improve numerical accuracy. + [h4 Accuracy] The Weibull distribution is implemented in terms of the @@ -104,7 +109,9 @@ and /q = 1-p/. [table [[Function][Implementation Notes]] [[pdf][Using the relation: pdf = [alpha][beta][super -[alpha] ]x[super [alpha] - 1] e[super -(x/beta)[super alpha]] ]] +[[logpdf][log(pdf) = log([alpha]) - [alpha] * log([beta]) + log(x) * ([alpha]-1) - pow(x/[beta], [alpha]) ]] [[cdf][Using the relation: p = -__expm1(-(x\/[beta])[super [alpha]]) ]] +[[logcdf][log(cdf) = log1p(-exp(-pow(x / [beta], [alpha]))) ]] [[cdf complement][Using the relation: q = e[super -(x\/[beta])[super [alpha]]] ]] [[quantile][Using the relation: x = [beta] * (-__log1p(-p))[super 1\/[alpha]] ]] [[quantile from the complement][Using the relation: x = [beta] * (-log(q))[super 1\/[alpha]] ]] diff --git a/doc/equations/holtsmark_ref1.svg b/doc/equations/holtsmark_ref1.svg new file mode 100644 index 0000000000..77e97352ec --- /dev/null +++ b/doc/equations/holtsmark_ref1.svg @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/equations/landau_ref1.svg b/doc/equations/landau_ref1.svg new file mode 100644 index 0000000000..19939a829f --- /dev/null +++ b/doc/equations/landau_ref1.svg @@ -0,0 +1,88 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/equations/mapairy_ref1.svg b/doc/equations/mapairy_ref1.svg new file mode 100644 index 0000000000..f0052a3bac --- /dev/null +++ b/doc/equations/mapairy_ref1.svg @@ -0,0 +1,86 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/equations/saspoint5_ref1.svg b/doc/equations/saspoint5_ref1.svg new file mode 100644 index 0000000000..b013598e24 --- /dev/null +++ b/doc/equations/saspoint5_ref1.svg @@ -0,0 +1,75 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/graphs/holtsmark_ccdf_accuracy_64.png b/doc/graphs/holtsmark_ccdf_accuracy_64.png new file mode 100644 index 0000000000..e86f37a551 Binary files /dev/null and b/doc/graphs/holtsmark_ccdf_accuracy_64.png differ diff --git a/doc/graphs/holtsmark_pdf1.svg b/doc/graphs/holtsmark_pdf1.svg new file mode 100644 index 0000000000..a06288261b --- /dev/null +++ b/doc/graphs/holtsmark_pdf1.svg @@ -0,0 +1,1525 @@ + + + + + + + + 2024-07-21T02:41:11.918601 + image/svg+xml + + + Matplotlib v3.9.0, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/graphs/holtsmark_pdf2.svg b/doc/graphs/holtsmark_pdf2.svg new file mode 100644 index 0000000000..420075c9bf --- /dev/null +++ b/doc/graphs/holtsmark_pdf2.svg @@ -0,0 +1,1423 @@ + + + + + + + + 2024-07-21T02:41:11.570758 + image/svg+xml + + + Matplotlib v3.9.0, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/graphs/holtsmark_pdf_accuracy_64.png b/doc/graphs/holtsmark_pdf_accuracy_64.png new file mode 100644 index 0000000000..fae6407447 Binary files /dev/null and b/doc/graphs/holtsmark_pdf_accuracy_64.png differ diff --git a/doc/graphs/landau_ccdf_accuracy_64.png b/doc/graphs/landau_ccdf_accuracy_64.png new file mode 100644 index 0000000000..7e157eb81f Binary files /dev/null and b/doc/graphs/landau_ccdf_accuracy_64.png differ diff --git a/doc/graphs/landau_cdf_accuracy_64.png b/doc/graphs/landau_cdf_accuracy_64.png new file mode 100644 index 0000000000..3c4a48eaeb Binary files /dev/null and b/doc/graphs/landau_cdf_accuracy_64.png differ diff --git a/doc/graphs/landau_pdf1.svg b/doc/graphs/landau_pdf1.svg new file mode 100644 index 0000000000..f07e248b15 --- /dev/null +++ b/doc/graphs/landau_pdf1.svg @@ -0,0 +1,1355 @@ + + + + + + + + 2024-07-21T02:07:44.252944 + image/svg+xml + + + Matplotlib v3.9.0, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/graphs/landau_pdf2.svg b/doc/graphs/landau_pdf2.svg new file mode 100644 index 0000000000..a3a6f7feb1 --- /dev/null +++ b/doc/graphs/landau_pdf2.svg @@ -0,0 +1,1313 @@ + + + + + + + + 2024-07-21T02:07:43.910893 + image/svg+xml + + + Matplotlib v3.9.0, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/graphs/landau_pdf_accuracy_64.png b/doc/graphs/landau_pdf_accuracy_64.png new file mode 100644 index 0000000000..3bb84efec8 Binary files /dev/null and b/doc/graphs/landau_pdf_accuracy_64.png differ diff --git a/doc/graphs/mapairy_ccdf_accuracy_64.png b/doc/graphs/mapairy_ccdf_accuracy_64.png new file mode 100644 index 0000000000..8303bf8753 Binary files /dev/null and b/doc/graphs/mapairy_ccdf_accuracy_64.png differ diff --git a/doc/graphs/mapairy_cdf_accuracy_64.png b/doc/graphs/mapairy_cdf_accuracy_64.png new file mode 100644 index 0000000000..0576655466 Binary files /dev/null and b/doc/graphs/mapairy_cdf_accuracy_64.png differ diff --git a/doc/graphs/mapairy_pdf1.svg b/doc/graphs/mapairy_pdf1.svg new file mode 100644 index 0000000000..1d240be465 --- /dev/null +++ b/doc/graphs/mapairy_pdf1.svg @@ -0,0 +1,1465 @@ + + + + + + + + 2024-07-21T02:36:43.684235 + image/svg+xml + + + Matplotlib v3.9.0, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/graphs/mapairy_pdf2.svg b/doc/graphs/mapairy_pdf2.svg new file mode 100644 index 0000000000..c77c269bed --- /dev/null +++ b/doc/graphs/mapairy_pdf2.svg @@ -0,0 +1,1414 @@ + + + + + + + + 2024-07-21T02:36:43.325220 + image/svg+xml + + + Matplotlib v3.9.0, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/graphs/mapairy_pdf_accuracy_64.png b/doc/graphs/mapairy_pdf_accuracy_64.png new file mode 100644 index 0000000000..45b1a1e828 Binary files /dev/null and b/doc/graphs/mapairy_pdf_accuracy_64.png differ diff --git a/doc/graphs/saspoint5_ccdf_accuracy_64.png b/doc/graphs/saspoint5_ccdf_accuracy_64.png new file mode 100644 index 0000000000..a1036160f4 Binary files /dev/null and b/doc/graphs/saspoint5_ccdf_accuracy_64.png differ diff --git a/doc/graphs/saspoint5_pdf1.svg b/doc/graphs/saspoint5_pdf1.svg new file mode 100644 index 0000000000..7c820ae225 --- /dev/null +++ b/doc/graphs/saspoint5_pdf1.svg @@ -0,0 +1,1445 @@ + + + + + + + + 2024-07-21T02:45:37.881015 + image/svg+xml + + + Matplotlib v3.9.0, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/graphs/saspoint5_pdf2.svg b/doc/graphs/saspoint5_pdf2.svg new file mode 100644 index 0000000000..dbd503ce7f --- /dev/null +++ b/doc/graphs/saspoint5_pdf2.svg @@ -0,0 +1,1340 @@ + + + + + + + + 2024-07-21T02:45:37.531784 + image/svg+xml + + + Matplotlib v3.9.0, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/graphs/saspoint5_pdf_accuracy_64.png b/doc/graphs/saspoint5_pdf_accuracy_64.png new file mode 100644 index 0000000000..e1ecd8f509 Binary files /dev/null and b/doc/graphs/saspoint5_pdf_accuracy_64.png differ diff --git a/doc/html/math_toolkit/dist_ref/dists/cauchy_dist.html b/doc/html/math_toolkit/dist_ref/dists/cauchy_dist.html index 9328fa44b9..79f041a239 100644 --- a/doc/html/math_toolkit/dist_ref/dists/cauchy_dist.html +++ b/doc/html/math_toolkit/dist_ref/dists/cauchy_dist.html @@ -221,7 +221,7 @@
Substituting into the above we get:

- p = -atan(1/x) ; x < 0 + p = -atan(1/x)/π ; x < 0

So the procedure is to calculate the cdf for -fabs(x) using the diff --git a/doc/math.qbk b/doc/math.qbk index 4b8804dbbb..385c93a5e8 100644 --- a/doc/math.qbk +++ b/doc/math.qbk @@ -424,7 +424,7 @@ and use the function's name as the link text.] [def __usual_accessors __cdf, __pdf, __quantile, __hazard, - __chf, __mean, __median, __mode, __variance, __sd, __skewness, + __chf, __logcdf, __logpdf, __mean, __median, __mode, __variance, __sd, __skewness, __kurtosis, __kurtosis_excess, __range and __support] [def __real_concept [link math_toolkit.real_concepts real concept]] @@ -557,6 +557,7 @@ and as a CD ISBN 0-9504833-2-X 978-0-9504833-2-0, Classification 519.2-dc22. [include overview/standalone.qbk] [include overview/result_type_calc.qbk] [include overview/error_handling.qbk] +[include overview/gpu.qbk] [section:compilers_overview Compilers] [compilers_overview] diff --git a/doc/overview/gpu.qbk b/doc/overview/gpu.qbk new file mode 100644 index 0000000000..7fb27e645e --- /dev/null +++ b/doc/overview/gpu.qbk @@ -0,0 +1,67 @@ +[section:gpu Support for GPU programming in Boost.Math] + +[h4 GPU Support] + +Selected functions, distributions, tools, etc. support running on both host and devices. +These functions will have the annotation `BOOST_MATH_GPU_ENABLED` or `BOOST_MATH_CUDA_ENABLED` next to their individual documentation. +Functions marked with `BOOST_MATH_GPU_ENABLED` are tested using CUDA (both NVCC and NVRTC) as well as SYCL to provide a wide range of support. +Functions marked with `BOOST_MATH_CUDA_ENABLED` are few, but due to its restrictions SYCL is unsupported. + +[h4 Policies] + +The default policy on all devices is ignore error due to the lack of throwing ability. +A user can specify their own policy like usual, but when the code is run on device it will be ignored. + +[h4 How to build with device support] + +When compiling with CUDA or SYCL you will have to ensure that your code is being run inside of a kernel function. +It is not enough to simply compile existing code with the NVCC compiler to run the code on the device. +A simple CUDA kernel to run the Beta Distribution CDF on NVCC would be: + + __global__ void cuda_beta_dist(const double* in, double* out, int num_elements) + { + const int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < num_elements) + { + out[i] = cdf(boost::math::beta_distribution(), in[i]); + } + } + +And on CUDA on NVRTC: + + const char* cuda_kernel = R"( + #include + extern "C" __global__ + void test_beta_dist_kernel(const double* in, double* out, int num_elements) + { + const int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < num_elements) + { + out[i] = boost::math::cdf(boost::math::beta_distribution(), in[i]); + } + } + )"; + +And lastly on SYCL: + + void sycl_beta_dist(const double* in, double* out, int num_elements, sycl::queue& q) + { + q.submit([&](sycl::handler& h) { + h.parallel_for(sycl::range<1>(num_elements), [=](sycl::id<1> i) { + out[i] = boost::math::cdf(boost::math::beta_distribution(), in[i]); + }); + }); + } + +Once your kernel function has been written then use the framework mechanism for launching the kernel. + +[endsect] [/section:gpu Support for GPU programming in Boost.Math] + +[/ + Copyright 2024. Matt Borland + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt). +] + diff --git a/doc/quadrature/double_exponential.qbk b/doc/quadrature/double_exponential.qbk index b4649adbc6..2959b94cd3 100644 --- a/doc/quadrature/double_exponential.qbk +++ b/doc/quadrature/double_exponential.qbk @@ -1,5 +1,6 @@ [/ Copyright (c) 2017 Nick Thompson +Copyright (c) 2024 Matt Borland Use, modification and distribution are subject to the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -538,6 +539,30 @@ This form integrates just fine over (-log([pi]/2), +[infin]) using either the `t [endsect] [/section:de_caveats Caveats] +[section:gpu_usage GPU Usage] + +`` + #include + + namespace boost{ namespace math{ namespace quadrature { + + template > + __device__ auto exp_sinh_integrate(const F& f, Real a, Real b, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels) + + template > + __device__ auto exp_sinh_integrate(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels) + +}}} +`` + +Quadrature is additionally able to run on CUDA (NVCC and NVRTC) platforms. +The major difference is outlined in the above function signatures. +When used on device these are free standing functions instead of using OOP like on the host. +The tables of abscissas and weights are stored in shared read only memory on the device instead of being initialized when the class is constructed. +An example use case would be in the finite elements method computing a stiffness matrix since it would consist of many different functions. + +[endsect] [/section:gpu_usage Usage] + [section:de_refes References] * Hidetosi Takahasi and Masatake Mori, ['Double Exponential Formulas for Numerical Integration] Publ. Res. Inst. Math. Sci., 9 (1974), pp. 721-741. diff --git a/doc/roots/roots.qbk b/doc/roots/roots.qbk index a229300690..ea347639b9 100644 --- a/doc/roots/roots.qbk +++ b/doc/roots/roots.qbk @@ -1,4 +1,4 @@ -[section:roots_deriv Root Finding With Derivatives: Newton-Raphson, Halley & Schr'''ö'''der] +[section:roots_deriv Root Finding With Derivatives: Newton-Raphson, Halley & Schroeder] [h4 Synopsis] @@ -10,10 +10,10 @@ namespace tools { // Note namespace boost::math::tools. // Newton-Raphson template - T newton_raphson_iterate(F f, T guess, T min, T max, int digits); + BOOST_MATH_GPU_ENABLED T newton_raphson_iterate(F f, T guess, T min, T max, int digits); template - T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& max_iter); + BOOST_MATH_GPU_ENABLED T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& max_iter); // Halley template @@ -22,7 +22,7 @@ template T halley_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& max_iter); - // Schr'''ö'''der + // Schroeder template T schroder_iterate(F f, T guess, T min, T max, int digits); @@ -61,7 +61,7 @@ For second-order iterative method ([@http://en.wikipedia.org/wiki/Newton_Raphson For the third-order methods ([@http://en.wikipedia.org/wiki/Halley%27s_method Halley] and -Schr'''ö'''der) +Schroeder) the `tuple` should have [*three] elements containing the evaluation of the function and its first and second derivatives.]] [[T guess] [The initial starting value. A good guess is crucial to quick convergence!]] @@ -147,7 +147,7 @@ Out of bounds steps revert to bisection of the current bounds. Under ideal conditions, the number of correct digits trebles with each iteration. -[h4:schroder Schr'''ö'''der's Method] +[h4:schroder Schroeder's Method] Given an initial guess x0 the subsequent values are computed using: @@ -162,8 +162,8 @@ Out of bounds steps revert to __bisection_wikipedia of the current bounds. Under ideal conditions, the number of correct digits trebles with each iteration. -This is Schr'''ö'''der's general result (equation 18 from [@http://drum.lib.umd.edu/handle/1903/577 Stewart, G. W. -"On Infinitely Many Algorithms for Solving Equations." English translation of Schr'''ö'''der's original paper. +This is Schroeder's general result (equation 18 from [@http://drum.lib.umd.edu/handle/1903/577 Stewart, G. W. +"On Infinitely Many Algorithms for Solving Equations." English translation of Schroeder's original paper. College Park, MD: University of Maryland, Institute for Advanced Computer Studies, Department of Computer Science, 1993].) This method guarantees at least quadratic convergence (the same as Newton's method), and is known to work well in the presence of multiple roots: diff --git a/doc/sf/airy.qbk b/doc/sf/airy.qbk index 5ff4c7cb5e..4756bee2d8 100644 --- a/doc/sf/airy.qbk +++ b/doc/sf/airy.qbk @@ -18,10 +18,10 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) namespace boost { namespace math { template - ``__sf_result`` airy_ai(T x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` airy_ai(T x); template - ``__sf_result`` airy_ai(T x, const Policy&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` airy_ai(T x, const Policy&); }} // namespaces @@ -78,10 +78,10 @@ This function is implemented in terms of the Bessel functions using the relation namespace boost { namespace math { template - ``__sf_result`` airy_bi(T x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` airy_bi(T x); template - ``__sf_result`` airy_bi(T x, const Policy&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` airy_bi(T x, const Policy&); }} // namespaces @@ -132,10 +132,10 @@ This function is implemented in terms of the Bessel functions using the relation namespace boost { namespace math { template - ``__sf_result`` airy_ai_prime(T x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` airy_ai_prime(T x); template - ``__sf_result`` airy_ai_prime(T x, const Policy&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` airy_ai_prime(T x, const Policy&); }} // namespaces @@ -186,10 +186,10 @@ This function is implemented in terms of the Bessel functions using the relation namespace boost { namespace math { template - ``__sf_result`` airy_bi_prime(T x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` airy_bi_prime(T x); template - ``__sf_result`` airy_bi_prime(T x, const Policy&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` airy_bi_prime(T x, const Policy&); }} // namespaces @@ -242,23 +242,23 @@ by providing an output iterator. The signature of the single value functions are: template - T airy_ai_zero( + BOOST_MATH_GPU_ENABLED T airy_ai_zero( int m); // 1-based index of zero. template - T airy_bi_zero( + BOOST_MATH_GPU_ENABLED T airy_bi_zero( int m); // 1-based index of zero. and for multiple zeros: template - OutputIterator airy_ai_zero( + BOOST_MATH_GPU_ENABLED OutputIterator airy_ai_zero( int start_index, // 1-based index of first zero. unsigned number_of_zeros, // How many zeros to generate. OutputIterator out_it); // Destination for zeros. template - OutputIterator airy_bi_zero( + BOOST_MATH_GPU_ENABLED OutputIterator airy_bi_zero( int start_index, // 1-based index of zero. unsigned number_of_zeros, // How many zeros to generate OutputIterator out_it); // Destination for zeros. @@ -266,25 +266,25 @@ and for multiple zeros: There are also versions which allow control of the __policy_section for error handling and precision. template - T airy_ai_zero( + BOOST_MATH_GPU_ENABLED T airy_ai_zero( int m, // 1-based index of zero. const Policy&); // Policy to use. template - T airy_bi_zero( + BOOST_MATH_GPU_ENABLED T airy_bi_zero( int m, // 1-based index of zero. const Policy&); // Policy to use. template - OutputIterator airy_ai_zero( + BOOST_MATH_GPU_ENABLED OutputIterator airy_ai_zero( int start_index, // 1-based index of first zero. unsigned number_of_zeros, // How many zeros to generate. OutputIterator out_it, // Destination for zeros. const Policy& pol); // Policy to use. template - OutputIterator airy_bi_zero( + BOOST_MATH_GPU_ENABLED OutputIterator airy_bi_zero( int start_index, // 1-based index of zero. unsigned number_of_zeros, // How many zeros to generate. OutputIterator out_it, // Destination for zeros. diff --git a/doc/sf/bessel_ik.qbk b/doc/sf/bessel_ik.qbk index d044ac7b80..9fa4e63a74 100644 --- a/doc/sf/bessel_ik.qbk +++ b/doc/sf/bessel_ik.qbk @@ -5,16 +5,16 @@ `#include ` template - ``__sf_result`` cyl_bessel_i(T1 v, T2 x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` cyl_bessel_i(T1 v, T2 x); template - ``__sf_result`` cyl_bessel_i(T1 v, T2 x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` cyl_bessel_i(T1 v, T2 x, const ``__Policy``&); template - ``__sf_result`` cyl_bessel_k(T1 v, T2 x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` cyl_bessel_k(T1 v, T2 x); template - ``__sf_result`` cyl_bessel_k(T1 v, T2 x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` cyl_bessel_k(T1 v, T2 x, const ``__Policy``&); [h4 Description] diff --git a/doc/sf/bessel_jy.qbk b/doc/sf/bessel_jy.qbk index 1f43bc7580..faf8788500 100644 --- a/doc/sf/bessel_jy.qbk +++ b/doc/sf/bessel_jy.qbk @@ -5,16 +5,16 @@ `#include ` template - ``__sf_result`` cyl_bessel_j(T1 v, T2 x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` cyl_bessel_j(T1 v, T2 x); template - ``__sf_result`` cyl_bessel_j(T1 v, T2 x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` cyl_bessel_j(T1 v, T2 x, const ``__Policy``&); template - ``__sf_result`` cyl_neumann(T1 v, T2 x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` cyl_neumann(T1 v, T2 x); template - ``__sf_result`` cyl_neumann(T1 v, T2 x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` cyl_neumann(T1 v, T2 x, const ``__Policy``&); [h4 Description] diff --git a/doc/sf/bessel_spherical.qbk b/doc/sf/bessel_spherical.qbk index e9cda89c70..eb1fa69154 100644 --- a/doc/sf/bessel_spherical.qbk +++ b/doc/sf/bessel_spherical.qbk @@ -5,16 +5,16 @@ `#include ` template - ``__sf_result`` sph_bessel(unsigned v, T2 x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` sph_bessel(unsigned v, T2 x); template - ``__sf_result`` sph_bessel(unsigned v, T2 x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` sph_bessel(unsigned v, T2 x, const ``__Policy``&); template - ``__sf_result`` sph_neumann(unsigned v, T2 x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` sph_neumann(unsigned v, T2 x); template - ``__sf_result`` sph_neumann(unsigned v, T2 x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` sph_neumann(unsigned v, T2 x, const ``__Policy``&); [h4 Description] diff --git a/doc/sf/beta.qbk b/doc/sf/beta.qbk index e332fa5030..7e1904c254 100644 --- a/doc/sf/beta.qbk +++ b/doc/sf/beta.qbk @@ -9,10 +9,10 @@ namespace boost{ namespace math{ template - ``__sf_result`` beta(T1 a, T2 b); + BOOST_MATH_GPU_ENABLED ``__sf_result`` beta(T1 a, T2 b); template - ``__sf_result`` beta(T1 a, T2 b, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` beta(T1 a, T2 b, const ``__Policy``&); }} // namespaces diff --git a/doc/sf/beta_derivative.qbk b/doc/sf/beta_derivative.qbk index 8606d6f2b3..5d3b9a13ef 100644 --- a/doc/sf/beta_derivative.qbk +++ b/doc/sf/beta_derivative.qbk @@ -9,10 +9,10 @@ namespace boost{ namespace math{ template - ``__sf_result`` ibeta_derivative(T1 a, T2 b, T3 x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_derivative(T1 a, T2 b, T3 x); template - ``__sf_result`` ibeta_derivative(T1 a, T2 b, T3 x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_derivative(T1 a, T2 b, T3 x, const ``__Policy``&); }} // namespaces diff --git a/doc/sf/digamma.qbk b/doc/sf/digamma.qbk index c88c5fe7b0..78b68403d8 100644 --- a/doc/sf/digamma.qbk +++ b/doc/sf/digamma.qbk @@ -9,10 +9,10 @@ namespace boost{ namespace math{ template - ``__sf_result`` digamma(T z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` digamma(T z); template - ``__sf_result`` digamma(T z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` digamma(T z, const ``__Policy``&); }} // namespaces diff --git a/doc/sf/ellint_carlson.qbk b/doc/sf/ellint_carlson.qbk index ca39cd6bef..db45697463 100644 --- a/doc/sf/ellint_carlson.qbk +++ b/doc/sf/ellint_carlson.qbk @@ -17,10 +17,10 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) namespace boost { namespace math { template - ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z) + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z) template - ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z, const ``__Policy``&) + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z, const ``__Policy``&) }} // namespaces @@ -32,10 +32,10 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) namespace boost { namespace math { template - ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z) + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z) template - ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z, const ``__Policy``&) + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z, const ``__Policy``&) }} // namespaces @@ -47,10 +47,10 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) namespace boost { namespace math { template - ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p) + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p) template - ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p, const ``__Policy``&) + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p, const ``__Policy``&) }} // namespaces @@ -62,10 +62,10 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) namespace boost { namespace math { template - ``__sf_result`` ellint_rc(T1 x, T2 y) + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rc(T1 x, T2 y) template - ``__sf_result`` ellint_rc(T1 x, T2 y, const ``__Policy``&) + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rc(T1 x, T2 y, const ``__Policy``&) }} // namespaces @@ -76,10 +76,10 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) namespace boost { namespace math { template - ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z) + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z) template - ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z, const ``__Policy``&) + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z, const ``__Policy``&) }} // namespaces @@ -98,10 +98,10 @@ when the arguments are of different types: otherwise the return is the same type as the arguments. template - ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z) + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z) template - ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z, const ``__Policy``&) + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z, const ``__Policy``&) Returns Carlson's Elliptic Integral ['R[sub F]]: @@ -113,10 +113,10 @@ one may be zero. Otherwise returns the result of __domain_error. [optional_policy] template - ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z) + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z) template - ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z, const ``__Policy``&) + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z, const ``__Policy``&) Returns Carlson's elliptic integral R[sub D]: @@ -128,10 +128,10 @@ zero, and that z >= 0. Otherwise returns the result of __domain_error. [optional_policy] template - ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p) + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p) template - ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p, const ``__Policy``&) + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p, const ``__Policy``&) Returns Carlson's elliptic integral R[sub J]: @@ -149,10 +149,10 @@ using the relation: [equation ellint17] template - ``__sf_result`` ellint_rc(T1 x, T2 y) + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rc(T1 x, T2 y) template - ``__sf_result`` ellint_rc(T1 x, T2 y, const ``__Policy``&) + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rc(T1 x, T2 y, const ``__Policy``&) Returns Carlson's elliptic integral R[sub C]: @@ -170,10 +170,10 @@ using the relation: [equation ellint18] template - ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z) + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z) template - ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z, const ``__Policy``&) + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z, const ``__Policy``&) Returns Carlson's elliptic integral ['R[sub G]:] diff --git a/doc/sf/ellint_legendre.qbk b/doc/sf/ellint_legendre.qbk index c780a9b019..50b633af9f 100644 --- a/doc/sf/ellint_legendre.qbk +++ b/doc/sf/ellint_legendre.qbk @@ -17,16 +17,16 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) namespace boost { namespace math { template - ``__sf_result`` ellint_1(T1 k, T2 phi); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T1 k, T2 phi); template - ``__sf_result`` ellint_1(T1 k, T2 phi, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T1 k, T2 phi, const ``__Policy``&); template - ``__sf_result`` ellint_1(T k); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T k); template - ``__sf_result`` ellint_1(T k, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T k, const ``__Policy``&); }} // namespaces @@ -42,10 +42,10 @@ when T1 and T2 are different types: when they are the same type then the result is the same type as the arguments. template - ``__sf_result`` ellint_1(T1 k, T2 phi); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T1 k, T2 phi); template - ``__sf_result`` ellint_1(T1 k, T2 phi, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T1 k, T2 phi, const ``__Policy``&); Returns the incomplete elliptic integral of the first kind ['F([phi], k)]: @@ -56,10 +56,10 @@ Requires k[super 2]sin[super 2](phi) < 1, otherwise returns the result of __doma [optional_policy] template - ``__sf_result`` ellint_1(T k); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T k); template - ``__sf_result`` ellint_1(T k, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T k, const ``__Policy``&); Returns the complete elliptic integral of the first kind ['K(k)]: @@ -123,16 +123,16 @@ and namespace boost { namespace math { template - ``__sf_result`` ellint_2(T1 k, T2 phi); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T1 k, T2 phi); template - ``__sf_result`` ellint_2(T1 k, T2 phi, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T1 k, T2 phi, const ``__Policy``&); template - ``__sf_result`` ellint_2(T k); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T k); template - ``__sf_result`` ellint_2(T k, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T k, const ``__Policy``&); }} // namespaces @@ -148,10 +148,10 @@ when T1 and T2 are different types: when they are the same type then the result is the same type as the arguments. template - ``__sf_result`` ellint_2(T1 k, T2 phi); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T1 k, T2 phi); template - ``__sf_result`` ellint_2(T1 k, T2 phi, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T1 k, T2 phi, const ``__Policy``&); Returns the incomplete elliptic integral of the second kind ['E([phi], k)]: @@ -162,10 +162,10 @@ Requires k[super 2]sin[super 2](phi) < 1, otherwise returns the result of __doma [optional_policy] template - ``__sf_result`` ellint_2(T k); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T k); template - ``__sf_result`` ellint_2(T k, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T k, const ``__Policy``&); Returns the complete elliptic integral of the second kind ['E(k)]: @@ -230,16 +230,16 @@ and namespace boost { namespace math { template - ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi); + BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi); template - ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi, const ``__Policy``&); + BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi, const ``__Policy``&); template - ``__sf_result`` ellint_3(T1 k, T2 n); + BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n); template - ``__sf_result`` ellint_3(T1 k, T2 n, const ``__Policy``&); + BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n, const ``__Policy``&); }} // namespaces @@ -255,10 +255,10 @@ when the arguments are of different types: when they are the same type then the is the same type as the arguments. template - ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi); + BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi); template - ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi, const ``__Policy``&); + BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi, const ``__Policy``&); Returns the incomplete elliptic integral of the third kind ['[Pi](n, [phi], k)]: @@ -271,10 +271,10 @@ would be complex). [optional_policy] template - ``__sf_result`` ellint_3(T1 k, T2 n); + BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n); template - ``__sf_result`` ellint_3(T1 k, T2 n, const ``__Policy``&); + BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n, const ``__Policy``&); Returns the complete elliptic integral of the first kind ['[Pi](n, k)]: @@ -355,16 +355,16 @@ and namespace boost { namespace math { template - ``__sf_result`` ellint_d(T1 k, T2 phi); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_d(T1 k, T2 phi); template - ``__sf_result`` ellint_d(T1 k, T2 phi, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_d(T1 k, T2 phi, const ``__Policy``&); template - ``__sf_result`` ellint_d(T1 k); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_d(T1 k); template - ``__sf_result`` ellint_d(T1 k, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_d(T1 k, const ``__Policy``&); }} // namespaces @@ -378,10 +378,10 @@ when the arguments are of different types: when they are the same type then the is the same type as the arguments. template - ``__sf_result`` ellint_d(T1 k, T2 phi); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_d(T1 k, T2 phi); template - ``__sf_result`` ellint_3(T1 k, T2 phi, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_3(T1 k, T2 phi, const ``__Policy``&); Returns the incomplete elliptic integral: @@ -394,10 +394,10 @@ would be complex). [optional_policy] template - ``__sf_result`` ellint_d(T1 k); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_d(T1 k); template - ``__sf_result`` ellint_d(T1 k, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_d(T1 k, const ``__Policy``&); Returns the complete elliptic integral ['D(k) = D([pi]/2, k)] @@ -463,10 +463,10 @@ using the relation: namespace boost { namespace math { template - ``__sf_result`` jacobi_zeta(T1 k, T2 phi); + BOOST_MATH_GPU_ENABLED ``__sf_result`` jacobi_zeta(T1 k, T2 phi); template - ``__sf_result`` jacobi_zeta(T1 k, T2 phi, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` jacobi_zeta(T1 k, T2 phi, const ``__Policy``&); }} // namespaces @@ -543,10 +543,10 @@ is [@../../example/jacobi_zeta_example.cpp jacobi_zeta_example.cpp]. namespace boost { namespace math { template - ``__sf_result`` heuman_lambda(T1 k, T2 phi); + BOOST_MATH_GPU_ENABLED ``__sf_result`` heuman_lambda(T1 k, T2 phi); template - ``__sf_result`` heuman_lambda(T1 k, T2 phi, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` heuman_lambda(T1 k, T2 phi, const ``__Policy``&); }} // namespaces diff --git a/doc/sf/erf.qbk b/doc/sf/erf.qbk index 3207b66c07..5f6bdf9fa5 100644 --- a/doc/sf/erf.qbk +++ b/doc/sf/erf.qbk @@ -9,16 +9,16 @@ namespace boost{ namespace math{ template - ``__sf_result`` erf(T z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` erf(T z); template - ``__sf_result`` erf(T z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` erf(T z, const ``__Policy``&); template - ``__sf_result`` erfc(T z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` erfc(T z); template - ``__sf_result`` erfc(T z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` erfc(T z, const ``__Policy``&); }} // namespaces @@ -30,10 +30,10 @@ the return type is `double` if T is an integer type, and T otherwise. [h4 Description] template - ``__sf_result`` erf(T z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` erf(T z); template - ``__sf_result`` erf(T z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` erf(T z, const ``__Policy``&); Returns the [@http://en.wikipedia.org/wiki/Error_function error function] [@http://functions.wolfram.com/GammaBetaErf/Erf/ erf] of z: @@ -43,10 +43,10 @@ Returns the [@http://en.wikipedia.org/wiki/Error_function error function] [graph erf] template - ``__sf_result`` erfc(T z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` erfc(T z); template - ``__sf_result`` erfc(T z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` erfc(T z, const ``__Policy``&); Returns the complement of the [@http://functions.wolfram.com/GammaBetaErf/Erfc/ error function] of z: diff --git a/doc/sf/erf_inv.qbk b/doc/sf/erf_inv.qbk index 729ec22d28..e8f7464e09 100644 --- a/doc/sf/erf_inv.qbk +++ b/doc/sf/erf_inv.qbk @@ -9,16 +9,16 @@ namespace boost{ namespace math{ template - ``__sf_result`` erf_inv(T p); + BOOST_MATH_GPU_ENABLED ``__sf_result`` erf_inv(T p); template - ``__sf_result`` erf_inv(T p, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` erf_inv(T p, const ``__Policy``&); template - ``__sf_result`` erfc_inv(T p); + BOOST_MATH_GPU_ENABLED ``__sf_result`` erfc_inv(T p); template - ``__sf_result`` erfc_inv(T p, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` erfc_inv(T p, const ``__Policy``&); }} // namespaces @@ -30,10 +30,10 @@ the return type is `double` if T is an integer type, and T otherwise. [h4 Description] template - ``__sf_result`` erf_inv(T z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` erf_inv(T z); template - ``__sf_result`` erf_inv(T z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` erf_inv(T z, const ``__Policy``&); Returns the [@http://functions.wolfram.com/GammaBetaErf/InverseErf/ inverse error function] of z, that is a value x such that: @@ -43,10 +43,10 @@ of z, that is a value x such that: [graph erf_inv] template - ``__sf_result`` erfc_inv(T z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` erfc_inv(T z); template - ``__sf_result`` erfc_inv(T z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` erfc_inv(T z, const ``__Policy``&); Returns the inverse of the complement of the error function of z, that is a value x such that: diff --git a/doc/sf/expint.qbk b/doc/sf/expint.qbk index 89554730d5..f0abf090e7 100644 --- a/doc/sf/expint.qbk +++ b/doc/sf/expint.qbk @@ -11,10 +11,10 @@ namespace boost{ namespace math{ template - ``__sf_result`` expint(unsigned n, T z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` expint(unsigned n, T z); template - ``__sf_result`` expint(unsigned n, T z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` expint(unsigned n, T z, const ``__Policy``&); }} // namespaces @@ -26,10 +26,10 @@ the return type is `double` if T is an integer type, and T otherwise. [h4 Description] template - ``__sf_result`` expint(unsigned n, T z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` expint(unsigned n, T z); template - ``__sf_result`` expint(unsigned n, T z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` expint(unsigned n, T z, const ``__Policy``&); Returns the [@http://mathworld.wolfram.com/En-Function.html exponential integral En] of z: @@ -100,10 +100,10 @@ is used. namespace boost{ namespace math{ template - ``__sf_result`` expint(T z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` expint(T z); template - ``__sf_result`` expint(T z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` expint(T z, const ``__Policy``&); }} // namespaces @@ -115,10 +115,10 @@ the return type is `double` if T is an integer type, and T otherwise. [h4 Description] template - ``__sf_result`` expint(T z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` expint(T z); template - ``__sf_result`` expint(T z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` expint(T z, const ``__Policy``&); Returns the [@http://mathworld.wolfram.com/ExponentialIntegral.html exponential integral] of z: diff --git a/doc/sf/gamma_derivatives.qbk b/doc/sf/gamma_derivatives.qbk index c7dd248799..1b578d8d98 100644 --- a/doc/sf/gamma_derivatives.qbk +++ b/doc/sf/gamma_derivatives.qbk @@ -9,10 +9,10 @@ namespace boost{ namespace math{ template - ``__sf_result`` gamma_p_derivative(T1 a, T2 x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p_derivative(T1 a, T2 x); template - ``__sf_result`` gamma_p_derivative(T1 a, T2 x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p_derivative(T1 a, T2 x, const ``__Policy``&); }} // namespaces diff --git a/doc/sf/gamma_ratios.qbk b/doc/sf/gamma_ratios.qbk index a3fcf864cb..0d076890d3 100644 --- a/doc/sf/gamma_ratios.qbk +++ b/doc/sf/gamma_ratios.qbk @@ -7,26 +7,26 @@ namespace boost{ namespace math{ template - ``__sf_result`` tgamma_ratio(T1 a, T2 b); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_ratio(T1 a, T2 b); template - ``__sf_result`` tgamma_ratio(T1 a, T2 b, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_ratio(T1 a, T2 b, const ``__Policy``&); template - ``__sf_result`` tgamma_delta_ratio(T1 a, T2 delta); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_delta_ratio(T1 a, T2 delta); template - ``__sf_result`` tgamma_delta_ratio(T1 a, T2 delta, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_delta_ratio(T1 a, T2 delta, const ``__Policy``&); }} // namespaces [h4 Description] template - ``__sf_result`` tgamma_ratio(T1 a, T2 b); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_ratio(T1 a, T2 b); template - ``__sf_result`` tgamma_ratio(T1 a, T2 b, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_ratio(T1 a, T2 b, const ``__Policy``&); Returns the ratio of gamma functions: @@ -37,10 +37,10 @@ Returns the ratio of gamma functions: Internally this just calls `tgamma_delta_ratio(a, b-a)`. template - ``__sf_result`` tgamma_delta_ratio(T1 a, T2 delta); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_delta_ratio(T1 a, T2 delta); template - ``__sf_result`` tgamma_delta_ratio(T1 a, T2 delta, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_delta_ratio(T1 a, T2 delta, const ``__Policy``&); Returns the ratio of gamma functions: diff --git a/doc/sf/gegenbauer.qbk b/doc/sf/gegenbauer.qbk index a6afc53d82..69671917c8 100644 --- a/doc/sf/gegenbauer.qbk +++ b/doc/sf/gegenbauer.qbk @@ -16,13 +16,13 @@ namespace boost{ namespace math{ template - Real gegenbauer(unsigned n, Real lambda, Real x); + BOOST_MATH_GPU_ENABLED Real gegenbauer(unsigned n, Real lambda, Real x); template - Real gegenbauer_prime(unsigned n, Real lambda, Real x); + BOOST_MATH_GPU_ENABLED Real gegenbauer_prime(unsigned n, Real lambda, Real x); template - Real gegenbauer_derivative(unsigned n, Real lambda, Real x, unsigned k); + BOOST_MATH_GPU_ENABLED Real gegenbauer_derivative(unsigned n, Real lambda, Real x, unsigned k); }} // namespaces diff --git a/doc/sf/hankel.qbk b/doc/sf/hankel.qbk index 4d8a5eda1e..05d65201b1 100644 --- a/doc/sf/hankel.qbk +++ b/doc/sf/hankel.qbk @@ -3,18 +3,36 @@ [h4 Synopsis] + #if !defined(__CUDACC__) && !defined(__CUDACC_RTC__) + template - std::complex<``__sf_result``> cyl_hankel_1(T1 v, T2 x); + BOOST_MATH_GPU_ENABLED std::complex<``__sf_result``> cyl_hankel_1(T1 v, T2 x); template - std::complex<``__sf_result``> cyl_hankel_1(T1 v, T2 x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED std::complex<``__sf_result``> cyl_hankel_1(T1 v, T2 x, const ``__Policy``&); template - std::complex<``__sf_result``> cyl_hankel_2(T1 v, T2 x); + BOOST_MATH_GPU_ENABLED std::complex<``__sf_result``> cyl_hankel_2(T1 v, T2 x); template - std::complex<``__sf_result``> cyl_hankel_2(T1 v, T2 x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED std::complex<``__sf_result``> cyl_hankel_2(T1 v, T2 x, const ``__Policy``&); + #else // When using cuda we use namespace cuda::std:: instead of std:: + + template + BOOST_MATH_GPU_ENABLED cuda::std::complex<``__sf_result``> cyl_hankel_1(T1 v, T2 x); + + template + BOOST_MATH_GPU_ENABLED cuda::std::complex<``__sf_result``> cyl_hankel_1(T1 v, T2 x, const ``__Policy``&); + + template + BOOST_MATH_GPU_ENABLED cuda::std::complex<``__sf_result``> cyl_hankel_2(T1 v, T2 x); + + template + BOOST_MATH_GPU_ENABLED cuda::std::complex<``__sf_result``> cyl_hankel_2(T1 v, T2 x, const ``__Policy``&); + + #endif + [h4 Description] @@ -77,18 +95,35 @@ routines for integer order are used. [h4 Synopsis] + #if !defined(__CUDACC__) && !defined(__CUDACC_RTC__) + template - std::complex<``__sf_result``> sph_hankel_1(T1 v, T2 x); + BOOST_MATH_GPU_ENABLED std::complex<``__sf_result``> sph_hankel_1(T1 v, T2 x); template - std::complex<``__sf_result``> sph_hankel_1(T1 v, T2 x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED std::complex<``__sf_result``> sph_hankel_1(T1 v, T2 x, const ``__Policy``&); template - std::complex<``__sf_result``> sph_hankel_2(T1 v, T2 x); + BOOST_MATH_GPU_ENABLED std::complex<``__sf_result``> sph_hankel_2(T1 v, T2 x); + + template + BOOST_MATH_GPU_ENABLED std::complex<``__sf_result``> sph_hankel_2(T1 v, T2 x, const ``__Policy``&); + #else // When using cuda we use namespace cuda::std:: instead of std:: + + template + BOOST_MATH_GPU_ENABLED cuda::std::complex<``__sf_result``> sph_hankel_1(T1 v, T2 x); + template - std::complex<``__sf_result``> sph_hankel_2(T1 v, T2 x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED cuda::std::complex<``__sf_result``> sph_hankel_1(T1 v, T2 x, const ``__Policy``&); + + template + BOOST_MATH_GPU_ENABLED cuda::std::complex<``__sf_result``> sph_hankel_2(T1 v, T2 x); + template + BOOST_MATH_GPU_ENABLED cuda::std::complex<``__sf_result``> sph_hankel_2(T1 v, T2 x, const ``__Policy``&); + + #endif [h4 Description] diff --git a/doc/sf/hermite.qbk b/doc/sf/hermite.qbk index c88aadc344..965aa80928 100644 --- a/doc/sf/hermite.qbk +++ b/doc/sf/hermite.qbk @@ -9,13 +9,13 @@ namespace boost{ namespace math{ template - ``__sf_result`` hermite(unsigned n, T x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` hermite(unsigned n, T x); template - ``__sf_result`` hermite(unsigned n, T x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` hermite(unsigned n, T x, const ``__Policy``&); template - ``__sf_result`` hermite_next(unsigned n, T1 x, T2 Hn, T3 Hnm1); + BOOST_MATH_GPU_ENABLED ``__sf_result`` hermite_next(unsigned n, T1 x, T2 Hn, T3 Hnm1); }} // namespaces @@ -26,10 +26,10 @@ note than when there is a single template argument the result is the same type as that argument or `double` if the template argument is an integer type. template - ``__sf_result`` hermite(unsigned n, T x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` hermite(unsigned n, T x); template - ``__sf_result`` hermite(unsigned n, T x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` hermite(unsigned n, T x, const ``__Policy``&); Returns the value of the Hermite Polynomial of order /n/ at point /x/: @@ -43,7 +43,7 @@ Hermite Polynomials: [graph hermite] template - ``__sf_result`` hermite_next(unsigned n, T1 x, T2 Hn, T3 Hnm1); + BOOST_MATH_GPU_ENABLED ``__sf_result`` hermite_next(unsigned n, T1 x, T2 Hn, T3 Hnm1); Implements the three term recurrence relation for the Hermite polynomials, this function can be used to create a sequence of diff --git a/doc/sf/ibeta.qbk b/doc/sf/ibeta.qbk index b4a20f9286..5227b2d342 100644 --- a/doc/sf/ibeta.qbk +++ b/doc/sf/ibeta.qbk @@ -9,28 +9,28 @@ namespace boost{ namespace math{ template - ``__sf_result`` ibeta(T1 a, T2 b, T3 x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta(T1 a, T2 b, T3 x); template - ``__sf_result`` ibeta(T1 a, T2 b, T3 x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta(T1 a, T2 b, T3 x, const ``__Policy``&); template - ``__sf_result`` ibetac(T1 a, T2 b, T3 x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac(T1 a, T2 b, T3 x); template - ``__sf_result`` ibetac(T1 a, T2 b, T3 x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac(T1 a, T2 b, T3 x, const ``__Policy``&); template - ``__sf_result`` beta(T1 a, T2 b, T3 x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` beta(T1 a, T2 b, T3 x); template - ``__sf_result`` beta(T1 a, T2 b, T3 x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` beta(T1 a, T2 b, T3 x, const ``__Policy``&); template - ``__sf_result`` betac(T1 a, T2 b, T3 x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` betac(T1 a, T2 b, T3 x); template - ``__sf_result`` betac(T1 a, T2 b, T3 x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` betac(T1 a, T2 b, T3 x, const ``__Policy``&); }} // namespaces @@ -57,10 +57,10 @@ when T1, T2 and T3 are different types. [optional_policy] template - ``__sf_result`` ibeta(T1 a, T2 b, T3 x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta(T1 a, T2 b, T3 x); template - ``__sf_result`` ibeta(T1 a, T2 b, T3 x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta(T1 a, T2 b, T3 x, const ``__Policy``&); Returns the normalised incomplete beta function of a, b and x: @@ -69,30 +69,30 @@ Returns the normalised incomplete beta function of a, b and x: [graph ibeta] template - ``__sf_result`` ibetac(T1 a, T2 b, T3 x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac(T1 a, T2 b, T3 x); template - ``__sf_result`` ibetac(T1 a, T2 b, T3 x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac(T1 a, T2 b, T3 x, const ``__Policy``&); Returns the normalised complement of the incomplete beta function of a, b and x: [equation ibeta4] template - ``__sf_result`` beta(T1 a, T2 b, T3 x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` beta(T1 a, T2 b, T3 x); template - ``__sf_result`` beta(T1 a, T2 b, T3 x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` beta(T1 a, T2 b, T3 x, const ``__Policy``&); Returns the full (non-normalised) incomplete beta function of a, b and x: [equation ibeta1] template - ``__sf_result`` betac(T1 a, T2 b, T3 x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` betac(T1 a, T2 b, T3 x); template - ``__sf_result`` betac(T1 a, T2 b, T3 x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` betac(T1 a, T2 b, T3 x, const ``__Policy``&); Returns the full (non-normalised) complement of the incomplete beta function of a, b and x: diff --git a/doc/sf/ibeta_inv.qbk b/doc/sf/ibeta_inv.qbk index 83c2b00086..60049db465 100644 --- a/doc/sf/ibeta_inv.qbk +++ b/doc/sf/ibeta_inv.qbk @@ -7,52 +7,52 @@ namespace boost{ namespace math{ template - ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p); template - ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, const ``__Policy``&); template - ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, T4* py); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, T4* py); template - ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, T4* py, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, T4* py, const ``__Policy``&); template - ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q); template - ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, const ``__Policy``&); template - ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, T4* py); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, T4* py); template - ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, T4* py, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, T4* py, const ``__Policy``&); template - ``__sf_result`` ibeta_inva(T1 b, T2 x, T3 p); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_inva(T1 b, T2 x, T3 p); template - ``__sf_result`` ibeta_inva(T1 b, T2 x, T3 p, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_inva(T1 b, T2 x, T3 p, const ``__Policy``&); template - ``__sf_result`` ibetac_inva(T1 b, T2 x, T3 q); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_inva(T1 b, T2 x, T3 q); template - ``__sf_result`` ibetac_inva(T1 b, T2 x, T3 q, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_inva(T1 b, T2 x, T3 q, const ``__Policy``&); template - ``__sf_result`` ibeta_invb(T1 a, T2 x, T3 p); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_invb(T1 a, T2 x, T3 p); template - ``__sf_result`` ibeta_invb(T1 a, T2 x, T3 p, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_invb(T1 a, T2 x, T3 p, const ``__Policy``&); template - ``__sf_result`` ibetac_invb(T1 a, T2 x, T3 q); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_invb(T1 a, T2 x, T3 q); template - ``__sf_result`` ibetac_invb(T1 a, T2 x, T3 q, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_invb(T1 a, T2 x, T3 q, const ``__Policy``&); }} // namespaces @@ -81,16 +81,16 @@ The return type of these functions is computed using the __arg_promotion_rules when called with arguments T1...TN of different types. template - ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p); template - ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, const ``__Policy``&); template - ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, T4* py); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, T4* py); template - ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, T4* py, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, T4* py, const ``__Policy``&); Returns a value /x/ such that: `p = ibeta(a, b, x);` and sets `*py = 1 - x` when the `py` parameter is provided and is non-null. @@ -104,16 +104,16 @@ Requires: /a,b > 0/ and /0 <= p <= 1/. [optional_policy] template - ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q); + BOOST_MATH_GPU_ENABLED``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q); template - ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, const ``__Policy``&); template - ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, T4* py); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, T4* py); template - ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, T4* py, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, T4* py, const ``__Policy``&); Returns a value /x/ such that: `q = ibetac(a, b, x);` and sets `*py = 1 - x` when the `py` parameter is provided and is non-null. @@ -127,10 +127,10 @@ Requires: /a,b > 0/ and /0 <= q <= 1/. [optional_policy] template - ``__sf_result`` ibeta_inva(T1 b, T2 x, T3 p); + BOOST_MATH_GPU_ENABLED``__sf_result`` ibeta_inva(T1 b, T2 x, T3 p); template - ``__sf_result`` ibeta_inva(T1 b, T2 x, T3 p, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED``__sf_result`` ibeta_inva(T1 b, T2 x, T3 p, const ``__Policy``&); Returns a value /a/ such that: `p = ibeta(a, b, x);` @@ -139,10 +139,10 @@ Requires: /b > 0/, /0 < x < 1/ and /0 <= p <= 1/. [optional_policy] template - ``__sf_result`` ibetac_inva(T1 b, T2 x, T3 p); + BOOST_MATH_GPU_ENABLED``__sf_result`` ibetac_inva(T1 b, T2 x, T3 p); template - ``__sf_result`` ibetac_inva(T1 b, T2 x, T3 p, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED``__sf_result`` ibetac_inva(T1 b, T2 x, T3 p, const ``__Policy``&); Returns a value /a/ such that: `q = ibetac(a, b, x);` @@ -151,10 +151,10 @@ Requires: /b > 0/, /0 < x < 1/ and /0 <= q <= 1/. [optional_policy] template - ``__sf_result`` ibeta_invb(T1 b, T2 x, T3 p); + BOOST_MATH_GPU_ENABLED``__sf_result`` ibeta_invb(T1 b, T2 x, T3 p); template - ``__sf_result`` ibeta_invb(T1 b, T2 x, T3 p, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED``__sf_result`` ibeta_invb(T1 b, T2 x, T3 p, const ``__Policy``&); Returns a value /b/ such that: `p = ibeta(a, b, x);` @@ -163,10 +163,10 @@ Requires: /a > 0/, /0 < x < 1/ and /0 <= p <= 1/. [optional_policy] template - ``__sf_result`` ibetac_invb(T1 b, T2 x, T3 p); + BOOST_MATH_GPU_ENABLED``__sf_result`` ibetac_invb(T1 b, T2 x, T3 p); template - ``__sf_result`` ibetac_invb(T1 b, T2 x, T3 p, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_invb(T1 b, T2 x, T3 p, const ``__Policy``&); Returns a value /b/ such that: `q = ibetac(a, b, x);` diff --git a/doc/sf/igamma.qbk b/doc/sf/igamma.qbk index ca354ad10f..4675928e63 100644 --- a/doc/sf/igamma.qbk +++ b/doc/sf/igamma.qbk @@ -9,28 +9,28 @@ namespace boost{ namespace math{ template - ``__sf_result`` gamma_p(T1 a, T2 z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p(T1 a, T2 z); template - ``__sf_result`` gamma_p(T1 a, T2 z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p(T1 a, T2 z, const ``__Policy``&); template - ``__sf_result`` gamma_q(T1 a, T2 z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q(T1 a, T2 z); template - ``__sf_result`` gamma_q(T1 a, T2 z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q(T1 a, T2 z, const ``__Policy``&); template - ``__sf_result`` tgamma_lower(T1 a, T2 z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_lower(T1 a, T2 z); template - ``__sf_result`` tgamma_lower(T1 a, T2 z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_lower(T1 a, T2 z, const ``__Policy``&); template - ``__sf_result`` tgamma(T1 a, T2 z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma(T1 a, T2 z); template - ``__sf_result`` tgamma(T1 a, T2 z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma(T1 a, T2 z, const ``__Policy``&); }} // namespaces @@ -53,10 +53,10 @@ The return type of these functions is computed using the __arg_promotion_rules when T1 and T2 are different types, otherwise the return type is simply T1. template - ``__sf_result`` gamma_p(T1 a, T2 z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p(T1 a, T2 z); template - ``__sf_result`` gamma_p(T1 a, T2 z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p(T1 a, T2 z, const ``__Policy``&); Returns the normalised lower incomplete gamma function of a and z: @@ -67,10 +67,10 @@ This function changes rapidly from 0 to 1 around the point z == a: [graph gamma_p] template - ``__sf_result`` gamma_q(T1 a, T2 z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q(T1 a, T2 z); template - ``__sf_result`` gamma_q(T1 a, T2 z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q(T1 a, T2 z, const ``__Policy``&); Returns the normalised upper incomplete gamma function of a and z: @@ -81,20 +81,20 @@ This function changes rapidly from 1 to 0 around the point z == a: [graph gamma_q] template - ``__sf_result`` tgamma_lower(T1 a, T2 z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_lower(T1 a, T2 z); template - ``__sf_result`` tgamma_lower(T1 a, T2 z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_lower(T1 a, T2 z, const ``__Policy``&); Returns the full (non-normalised) lower incomplete gamma function of a and z: [equation igamma2] template - ``__sf_result`` tgamma(T1 a, T2 z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma(T1 a, T2 z); template - ``__sf_result`` tgamma(T1 a, T2 z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma(T1 a, T2 z, const ``__Policy``&); Returns the full (non-normalised) upper incomplete gamma function of a and z: diff --git a/doc/sf/igamma_inv.qbk b/doc/sf/igamma_inv.qbk index 593c92141b..55fe76e6e8 100644 --- a/doc/sf/igamma_inv.qbk +++ b/doc/sf/igamma_inv.qbk @@ -9,28 +9,28 @@ namespace boost{ namespace math{ template - ``__sf_result`` gamma_q_inv(T1 a, T2 q); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q_inv(T1 a, T2 q); template - ``__sf_result`` gamma_q_inv(T1 a, T2 q, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q_inv(T1 a, T2 q, const ``__Policy``&); template - ``__sf_result`` gamma_p_inv(T1 a, T2 p); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p_inv(T1 a, T2 p); template - ``__sf_result`` gamma_p_inv(T1 a, T2 p, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p_inv(T1 a, T2 p, const ``__Policy``&); template - ``__sf_result`` gamma_q_inva(T1 x, T2 q); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q_inva(T1 x, T2 q); template - ``__sf_result`` gamma_q_inva(T1 x, T2 q, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q_inva(T1 x, T2 q, const ``__Policy``&); template - ``__sf_result`` gamma_p_inva(T1 x, T2 p); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p_inva(T1 x, T2 p); template - ``__sf_result`` gamma_p_inva(T1 x, T2 p, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p_inva(T1 x, T2 p, const ``__Policy``&); }} // namespaces @@ -58,40 +58,40 @@ These are implemented here as `gamma_p_inva` and `gamma_q_inva`.] template - ``__sf_result`` gamma_q_inv(T1 a, T2 q); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q_inv(T1 a, T2 q); template - ``__sf_result`` gamma_q_inv(T1 a, T2 q, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q_inv(T1 a, T2 q, const ``__Policy``&); Returns a value x such that: `q = gamma_q(a, x);` Requires: /a > 0/ and /1 >= p,q >= 0/. template - ``__sf_result`` gamma_p_inv(T1 a, T2 p); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p_inv(T1 a, T2 p); template - ``__sf_result`` gamma_p_inv(T1 a, T2 p, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p_inv(T1 a, T2 p, const ``__Policy``&); Returns a value x such that: `p = gamma_p(a, x);` Requires: /a > 0/ and /1 >= p,q >= 0/. template - ``__sf_result`` gamma_q_inva(T1 x, T2 q); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q_inva(T1 x, T2 q); template - ``__sf_result`` gamma_q_inva(T1 x, T2 q, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q_inva(T1 x, T2 q, const ``__Policy``&); Returns a value a such that: `q = gamma_q(a, x);` Requires: /x > 0/ and /1 >= p,q >= 0/. template - ``__sf_result`` gamma_p_inva(T1 x, T2 p); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p_inva(T1 x, T2 p); template - ``__sf_result`` gamma_p_inva(T1 x, T2 p, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p_inva(T1 x, T2 p, const ``__Policy``&); Returns a value a such that: `p = gamma_p(a, x);` diff --git a/doc/sf/lgamma.qbk b/doc/sf/lgamma.qbk index 5ea1a4e091..544485c7ca 100644 --- a/doc/sf/lgamma.qbk +++ b/doc/sf/lgamma.qbk @@ -9,16 +9,16 @@ namespace boost{ namespace math{ template - ``__sf_result`` lgamma(T z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma(T z); template - ``__sf_result`` lgamma(T z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma(T z, const ``__Policy``&); template - ``__sf_result`` lgamma(T z, int* sign); + BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma(T z, int* sign); template - ``__sf_result`` lgamma(T z, int* sign, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma(T z, int* sign, const ``__Policy``&); }} // namespaces diff --git a/doc/sf/pow.qbk b/doc/sf/pow.qbk index db021978e2..ecb762d711 100644 --- a/doc/sf/pow.qbk +++ b/doc/sf/pow.qbk @@ -10,10 +10,10 @@ power of a run-time base. namespace boost { namespace math { template - constexpr ``__sf_result`` pow(T base); + BOOST_MATH_GPU_ENABLED constexpr ``__sf_result`` pow(T base); template - constexpr ``__sf_result`` pow(T base, const Policy& policy); + BOOST_MATH_GPU_ENABLED constexpr ``__sf_result`` pow(T base, const Policy& policy); }} diff --git a/doc/sf/sinc.qbk b/doc/sf/sinc.qbk index b345c08cd7..a6042a7171 100644 --- a/doc/sf/sinc.qbk +++ b/doc/sf/sinc.qbk @@ -43,16 +43,16 @@ and [@http://mathworld.wolfram.com/Octonion.html octonions]. `` template - ``__sf_result`` sinc_pi(const T x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` sinc_pi(const T x); template - ``__sf_result`` sinc_pi(const T x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` sinc_pi(const T x, const ``__Policy``&); template class U> - U sinc_pi(const U x); + BOOST_MATH_GPU_ENABLED U sinc_pi(const U x); template class U, class ``__Policy``> - U sinc_pi(const U x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED U sinc_pi(const U x, const ``__Policy``&); Computes [link math_toolkit.sinc.sinc_overview @@ -78,10 +78,10 @@ to ensure accuracy. `` template - ``__sf_result`` sinhc_pi(const T x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` sinhc_pi(const T x); template - ``__sf_result`` sinhc_pi(const T x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` sinhc_pi(const T x, const ``__Policy``&); template class U> U sinhc_pi(const U x); diff --git a/doc/sf/tgamma.qbk b/doc/sf/tgamma.qbk index 7eb535ec3a..23baad2cb8 100644 --- a/doc/sf/tgamma.qbk +++ b/doc/sf/tgamma.qbk @@ -9,26 +9,26 @@ namespace boost{ namespace math{ template - ``__sf_result`` tgamma(T z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma(T z); template - ``__sf_result`` tgamma(T z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma(T z, const ``__Policy``&); template - ``__sf_result`` tgamma1pm1(T dz); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma1pm1(T dz); template - ``__sf_result`` tgamma1pm1(T dz, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma1pm1(T dz, const ``__Policy``&); }} // namespaces [h4 Description] template - ``__sf_result`` tgamma(T z); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma(T z); template - ``__sf_result`` tgamma(T z, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma(T z, const ``__Policy``&); Returns the "true gamma" (hence name tgamma) of value z: @@ -42,10 +42,10 @@ The return type of this function is computed using the __arg_promotion_rules: the result is `double` when T is an integer type, and T otherwise. template - ``__sf_result`` tgamma1pm1(T dz); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma1pm1(T dz); template - ``__sf_result`` tgamma1pm1(T dz, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma1pm1(T dz, const ``__Policy``&); Returns `tgamma(dz + 1) - 1`. Internally the implementation does not make use of the addition and subtraction implied by the definition, leading to diff --git a/doc/sf/trigamma.qbk b/doc/sf/trigamma.qbk index 137a148d83..a358c85713 100644 --- a/doc/sf/trigamma.qbk +++ b/doc/sf/trigamma.qbk @@ -9,10 +9,10 @@ namespace boost{ namespace math{ template - ``__sf_result`` trigamma(T x); + BOOST_MATH_GPU_ENABLED ``__sf_result`` trigamma(T x); template - ``__sf_result`` trigamma(T x, const ``__Policy``&); + BOOST_MATH_GPU_ENABLED ``__sf_result`` trigamma(T x, const ``__Policy``&); }} // namespaces diff --git a/example/Jamfile.v2 b/example/Jamfile.v2 index 34e4a5a8c8..2a6ad6947d 100644 --- a/example/Jamfile.v2 +++ b/example/Jamfile.v2 @@ -7,10 +7,13 @@ # bring in the rules for testing import testing ; -import ../../config/checks/config : requires ; +import-search /boost/config/checks ; +import config : requires ; project : requirements + /boost/math//boost_math + /boost/multiprecision//boost_multiprecision gcc:-Wno-missing-braces darwin:-Wno-missing-braces acc:+W2068,2461,2236,4070 @@ -36,7 +39,6 @@ project clang:-Wno-unknown-pragmas clang:-Wno-language-extension-token - ../../.. ../include_private off:../test//no_eh [ requires cxx11_noexcept cxx11_rvalue_references sfinae_expr cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_hdr_tuple cxx11_hdr_initializer_list cxx11_hdr_chrono cxx11_thread_local cxx11_constexpr cxx11_nullptr cxx11_numeric_limits cxx11_decltype cxx11_hdr_array cxx11_hdr_atomic cxx11_hdr_type_traits cxx11_allocator cxx11_explicit_conversion_operators ] @@ -53,7 +55,7 @@ test-suite examples : [ run binomial_example_nag.cpp ] [ run binomial_quiz_example.cpp : : : off:no ] [ run binomial_sample_sizes.cpp ] - [ run brent_minimise_example.cpp : : : [ requires cxx11_hdr_tuple ] ] + [ run brent_minimise_example.cpp /boost/test//included : : : [ requires cxx11_hdr_tuple ] ] [ run c_error_policy_example.cpp ] [ run chi_square_std_dev_test.cpp : : : off:no ] @@ -93,11 +95,11 @@ test-suite examples : [ run nonfinite_num_facet.cpp ] [ run nonfinite_facet_simple.cpp ] - #[ run nonfinite_num_facet_serialization.cpp ../../serialization/build//boost_serialization : : : off:no gcc-mingw:static ] + #[ run nonfinite_num_facet_serialization.cpp /boost/serialization//boost_serialization : : : off:no gcc-mingw:static ] #[ # run lexical_cast_native.cpp ] # Expected to fail on some (but not all) platforms. [ run lexical_cast_nonfinite_facets.cpp ] [ run nonfinite_loopback_ok.cpp ] - #[ run nonfinite_serialization_archives.cpp ../../serialization/build//boost_serialization : : : off:no gcc-mingw:static ] + #[ run nonfinite_serialization_archives.cpp /boost/serialization//boost_serialization : : : off:no gcc-mingw:static ] [ run nonfinite_facet_sstream.cpp ] [ run constants_eg1.cpp ] @@ -113,7 +115,7 @@ test-suite examples : [ run policy_eg_6.cpp ] [ run policy_eg_7.cpp ] [ run policy_eg_8.cpp ] - [ run policy_eg_9.cpp ] + [ run policy_eg_9.cpp /boost/format//boost_format ] [ run policy_ref_snip1.cpp : : : off:no ] [ run policy_ref_snip10.cpp ] [ run policy_ref_snip11.cpp ] diff --git a/include/boost/math/ccmath/copysign.hpp b/include/boost/math/ccmath/copysign.hpp index 90a58102b1..e117e57faa 100644 --- a/include/boost/math/ccmath/copysign.hpp +++ b/include/boost/math/ccmath/copysign.hpp @@ -54,7 +54,7 @@ constexpr auto copysign(T1 mag, T2 sgn) noexcept { if (BOOST_MATH_IS_CONSTANT_EVALUATED(mag)) { - using promoted_type = boost::math::tools::promote_args_2_t; + using promoted_type = boost::math::tools::promote_args_t; return boost::math::ccmath::copysign(static_cast(mag), static_cast(sgn)); } else diff --git a/include/boost/math/ccmath/fdim.hpp b/include/boost/math/ccmath/fdim.hpp index cdcbc223c6..d6b4e25cec 100644 --- a/include/boost/math/ccmath/fdim.hpp +++ b/include/boost/math/ccmath/fdim.hpp @@ -66,7 +66,7 @@ constexpr auto fdim(T1 x, T2 y) noexcept { if (BOOST_MATH_IS_CONSTANT_EVALUATED(x)) { - using promoted_type = boost::math::tools::promote_args_2_t; + using promoted_type = boost::math::tools::promote_args_t; return boost::math::ccmath::fdim(promoted_type(x), promoted_type(y)); } else diff --git a/include/boost/math/ccmath/fmax.hpp b/include/boost/math/ccmath/fmax.hpp index 237355275b..8a0d17d03e 100644 --- a/include/boost/math/ccmath/fmax.hpp +++ b/include/boost/math/ccmath/fmax.hpp @@ -62,7 +62,7 @@ constexpr auto fmax(T1 x, T2 y) noexcept { if (BOOST_MATH_IS_CONSTANT_EVALUATED(x)) { - using promoted_type = boost::math::tools::promote_args_2_t; + using promoted_type = boost::math::tools::promote_args_t; return boost::math::ccmath::fmax(static_cast(x), static_cast(y)); } else diff --git a/include/boost/math/ccmath/fmin.hpp b/include/boost/math/ccmath/fmin.hpp index 1c113e0d6e..29885b69c8 100644 --- a/include/boost/math/ccmath/fmin.hpp +++ b/include/boost/math/ccmath/fmin.hpp @@ -62,7 +62,7 @@ constexpr auto fmin(T1 x, T2 y) noexcept { if (BOOST_MATH_IS_CONSTANT_EVALUATED(x)) { - using promoted_type = boost::math::tools::promote_args_2_t; + using promoted_type = boost::math::tools::promote_args_t; return boost::math::ccmath::fmin(static_cast(x), static_cast(y)); } else diff --git a/include/boost/math/ccmath/hypot.hpp b/include/boost/math/ccmath/hypot.hpp index 4e0e245b4e..34dd5ab2c0 100644 --- a/include/boost/math/ccmath/hypot.hpp +++ b/include/boost/math/ccmath/hypot.hpp @@ -89,7 +89,7 @@ constexpr auto hypot(T1 x, T2 y) noexcept { if(BOOST_MATH_IS_CONSTANT_EVALUATED(x)) { - using promoted_type = boost::math::tools::promote_args_2_t; + using promoted_type = boost::math::tools::promote_args_t; return boost::math::ccmath::hypot(static_cast(x), static_cast(y)); } else diff --git a/include/boost/math/ccmath/isinf.hpp b/include/boost/math/ccmath/isinf.hpp index f1e00e34f5..ecf0d620ab 100644 --- a/include/boost/math/ccmath/isinf.hpp +++ b/include/boost/math/ccmath/isinf.hpp @@ -22,7 +22,14 @@ constexpr bool isinf BOOST_MATH_PREVENT_MACRO_SUBSTITUTION(T x) noexcept { if constexpr (std::numeric_limits::is_signed) { +#if defined(__clang_major__) && __clang_major__ >= 6 +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wtautological-constant-compare" +#endif return x == std::numeric_limits::infinity() || -x == std::numeric_limits::infinity(); +#if defined(__clang_major__) && __clang_major__ >= 6 +#pragma clang diagnostic pop +#endif } else { @@ -32,7 +39,7 @@ constexpr bool isinf BOOST_MATH_PREVENT_MACRO_SUBSTITUTION(T x) noexcept else { using boost::math::isinf; - + if constexpr (!std::is_integral_v) { return (isinf)(x); diff --git a/include/boost/math/concepts/std_real_concept.hpp b/include/boost/math/concepts/std_real_concept.hpp index f77935c7fb..43f562efe1 100644 --- a/include/boost/math/concepts/std_real_concept.hpp +++ b/include/boost/math/concepts/std_real_concept.hpp @@ -229,19 +229,22 @@ inline boost::math::concepts::std_real_concept (nextafter)(boost::math::concepts { return (boost::math::nextafter)(a, b); } // // C++11 ism's -// Note that these must not actually call the std:: versions as that precludes using this -// header to test in C++03 mode, call the Boost versions instead: +// Now that we only support C++11 and later, we can allow use of these: // inline boost::math::concepts::std_real_concept asinh(boost::math::concepts::std_real_concept a) -{ return boost::math::asinh(a.value(), boost::math::policies::make_policy(boost::math::policies::overflow_error())); } +{ return std::asinh(a.value()); } inline boost::math::concepts::std_real_concept acosh(boost::math::concepts::std_real_concept a) -{ return boost::math::acosh(a.value(), boost::math::policies::make_policy(boost::math::policies::overflow_error())); } +{ return std::acosh(a.value()); } inline boost::math::concepts::std_real_concept atanh(boost::math::concepts::std_real_concept a) -{ return boost::math::atanh(a.value(), boost::math::policies::make_policy(boost::math::policies::overflow_error())); } +{ return std::atanh(a.value()); } inline bool (isfinite)(boost::math::concepts::std_real_concept a) { return (boost::math::isfinite)(a.value()); } +inline boost::math::concepts::std_real_concept log2(boost::math::concepts::std_real_concept a) +{ return std::log2(a.value()); } +inline int ilogb(boost::math::concepts::std_real_concept a) +{ return std::ilogb(a.value()); } } // namespace std diff --git a/include/boost/math/constants/constants.hpp b/include/boost/math/constants/constants.hpp index 4bf81c61d1..df702bf899 100644 --- a/include/boost/math/constants/constants.hpp +++ b/include/boost/math/constants/constants.hpp @@ -1,5 +1,6 @@ // Copyright John Maddock 2005-2006, 2011. // Copyright Paul A. Bristow 2006-2011. +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -8,6 +9,9 @@ #define BOOST_MATH_CONSTANTS_CONSTANTS_INCLUDED #include + +#ifndef BOOST_MATH_HAS_NVRTC + #include #include #include @@ -209,11 +213,11 @@ namespace boost{ namespace math constant_initializer::get_from_string >::force_instantiate();\ return get_from_string();\ }\ - static inline constexpr T get(const std::integral_constant) noexcept\ + BOOST_MATH_GPU_ENABLED static inline constexpr T get(const std::integral_constant) noexcept\ { return BOOST_MATH_JOIN(x, F); }\ - static inline constexpr T get(const std::integral_constant&) noexcept\ + BOOST_MATH_GPU_ENABLED static inline constexpr T get(const std::integral_constant&) noexcept\ { return x; }\ - static inline constexpr T get(const std::integral_constant&) noexcept\ + BOOST_MATH_GPU_ENABLED static inline constexpr T get(const std::integral_constant&) noexcept\ { return BOOST_MATH_JOIN(x, L); }\ BOOST_MATH_FLOAT128_CONSTANT_OVERLOAD(x) \ template static inline const T& get(const std::integral_constant&)\ @@ -231,9 +235,9 @@ namespace boost{ namespace math \ \ /* The actual forwarding function: */ \ - template inline constexpr typename detail::constant_return::type name(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T) BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(Policy)) BOOST_MATH_NOEXCEPT(T)\ + template BOOST_MATH_GPU_ENABLED inline constexpr typename detail::constant_return::type name(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T) BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(Policy)) BOOST_MATH_NOEXCEPT(T)\ { return detail:: BOOST_MATH_JOIN(constant_, name)::get(typename construction_traits::type()); }\ - template inline constexpr typename detail::constant_return::type name(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T)) BOOST_MATH_NOEXCEPT(T)\ + template BOOST_MATH_GPU_ENABLED inline constexpr typename detail::constant_return::type name(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T)) BOOST_MATH_NOEXCEPT(T)\ { return name >(); }\ \ \ @@ -243,6 +247,16 @@ namespace boost{ namespace math namespace long_double_constants{ static constexpr long double name = BOOST_MATH_JOIN(x, L); }\ namespace constants{ +#else // NVRTC simplified macro definition + +#define BOOST_DEFINE_MATH_CONSTANT(name, value, str_value) template BOOST_MATH_GPU_ENABLED constexpr T name() noexcept { return static_cast(value); } + +namespace boost { +namespace math { +namespace constants { + +#endif + BOOST_DEFINE_MATH_CONSTANT(half, 5.000000000000000000000000000000000000e-01, "5.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e-01") BOOST_DEFINE_MATH_CONSTANT(third, 3.333333333333333333333333333333333333e-01, "3.33333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333e-01") BOOST_DEFINE_MATH_CONSTANT(twothirds, 6.666666666666666666666666666666666666e-01, "6.66666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666667e-01") @@ -318,17 +332,15 @@ namespace boost{ namespace math BOOST_DEFINE_MATH_CONSTANT(one_div_pi, 0.3183098861837906715377675267450287240689192, "0.31830988618379067153776752674502872406891929148091289749533468811779359526845307018022760553250617191214568545351") BOOST_DEFINE_MATH_CONSTANT(two_div_root_pi, 1.12837916709551257389615890312154517168810125, "1.12837916709551257389615890312154517168810125865799771368817144342128493688298682897348732040421472688605669581272") -#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900) BOOST_DEFINE_MATH_CONSTANT(first_feigenbaum, 4.66920160910299067185320382046620161725818557747576863274, "4.6692016091029906718532038204662016172581855774757686327456513430041343302113147371386897440239480138171") BOOST_DEFINE_MATH_CONSTANT(plastic, 1.324717957244746025960908854478097340734404056901733364534, "1.32471795724474602596090885447809734073440405690173336453401505030282785124554759405469934798178728032991") BOOST_DEFINE_MATH_CONSTANT(gauss, 0.834626841674073186281429732799046808993993013490347002449, "0.83462684167407318628142973279904680899399301349034700244982737010368199270952641186969116035127532412906785") BOOST_DEFINE_MATH_CONSTANT(dottie, 0.739085133215160641655312087673873404013411758900757464965, "0.739085133215160641655312087673873404013411758900757464965680635773284654883547594599376106931766531849801246") BOOST_DEFINE_MATH_CONSTANT(reciprocal_fibonacci, 3.35988566624317755317201130291892717968890513, "3.35988566624317755317201130291892717968890513373196848649555381532513031899668338361541621645679008729704") BOOST_DEFINE_MATH_CONSTANT(laplace_limit, 0.662743419349181580974742097109252907056233549115022417, "0.66274341934918158097474209710925290705623354911502241752039253499097185308651127724965480259895818168") -#endif template -inline constexpr T tau() { return two_pi(); } +BOOST_MATH_GPU_ENABLED inline constexpr T tau() { return two_pi(); } } // namespace constants } // namespace math @@ -338,7 +350,11 @@ inline constexpr T tau() { return two_pi(); } // We deliberately include this *after* all the declarations above, // that way the calculation routines can call on other constants above: // +// NVRTC will not have a type that needs runtime calculation +// +#ifndef BOOST_MATH_HAS_NVRTC #include +#endif #endif // BOOST_MATH_CONSTANTS_CONSTANTS_INCLUDED diff --git a/include/boost/math/differentiation/autodiff.hpp b/include/boost/math/differentiation/autodiff.hpp index 7a57aa2f92..b8880f24de 100644 --- a/include/boost/math/differentiation/autodiff.hpp +++ b/include/boost/math/differentiation/autodiff.hpp @@ -39,7 +39,7 @@ namespace detail { template struct promote_args_n { - using type = typename tools::promote_args_2::type>::type; + using type = typename tools::promote_args::type>::type; }; template @@ -2002,9 +2002,9 @@ using autodiff_root_type = typename autodiff_fvar_type::root_ty // See boost/math/tools/promotion.hpp template -struct promote_args_2, +struct promote_args, detail::autodiff_fvar_type> { - using type = detail::autodiff_fvar_type::type, + using type = detail::autodiff_fvar_type::type, #ifndef BOOST_MATH_NO_CXX14_CONSTEXPR (std::max)(Order0, Order1)>; #else @@ -2018,13 +2018,13 @@ struct promote_args> { }; template -struct promote_args_2, RealType1> { - using type = detail::autodiff_fvar_type::type, Order0>; +struct promote_args, RealType1> { + using type = detail::autodiff_fvar_type::type, Order0>; }; template -struct promote_args_2> { - using type = detail::autodiff_fvar_type::type, Order1>; +struct promote_args> { + using type = detail::autodiff_fvar_type::type, Order1>; }; template diff --git a/include/boost/math/distributions.hpp b/include/boost/math/distributions.hpp index 64da99415e..0834db870a 100644 --- a/include/boost/math/distributions.hpp +++ b/include/boost/math/distributions.hpp @@ -24,15 +24,18 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include +#include #include #include #include @@ -42,6 +45,7 @@ #include #include #include +#include #include #include #include diff --git a/include/boost/math/distributions/arcsine.hpp b/include/boost/math/distributions/arcsine.hpp index a8fcbbc05f..899bfb1b2b 100644 --- a/include/boost/math/distributions/arcsine.hpp +++ b/include/boost/math/distributions/arcsine.hpp @@ -2,6 +2,7 @@ // Copyright John Maddock 2014. // Copyright Paul A. Bristow 2014. +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. @@ -29,13 +30,21 @@ #ifndef BOOST_MATH_DIST_ARCSINE_HPP #define BOOST_MATH_DIST_ARCSINE_HPP -#include -#include +#include +#include #include // complements. #include // error checks. #include - #include // isnan. +#include +#include + +#ifndef BOOST_MATH_HAS_NVRTC +#include +#include +#include +#include // For std::domain_error. +#endif #if defined (BOOST_MSVC) # pragma warning(push) @@ -43,9 +52,6 @@ // in domain_error_imp in error_handling. #endif -#include -#include // For std::domain_error. - namespace boost { namespace math @@ -55,7 +61,7 @@ namespace boost // Common error checking routines for arcsine distribution functions: // Duplicating for x_min and x_max provides specific error messages. template - inline bool check_x_min(const char* function, const RealType& x, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_x_min(const char* function, const RealType& x, RealType* result, const Policy& pol) { if (!(boost::math::isfinite)(x)) { @@ -68,7 +74,7 @@ namespace boost } // bool check_x_min template - inline bool check_x_max(const char* function, const RealType& x, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_x_max(const char* function, const RealType& x, RealType* result, const Policy& pol) { if (!(boost::math::isfinite)(x)) { @@ -82,14 +88,14 @@ namespace boost template - inline bool check_x_minmax(const char* function, const RealType& x_min, const RealType& x_max, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_x_minmax(const char* function, const RealType& x_min, const RealType& x_max, RealType* result, const Policy& pol) { // Check x_min < x_max if (x_min >= x_max) { - std::string msg = "x_max argument is %1%, but must be > x_min"; + constexpr auto msg = "x_max argument is %1%, but must be > x_min"; *result = policies::raise_domain_error( function, - msg.c_str(), x_max, pol); + msg, x_max, pol); // "x_max argument is %1%, but must be > x_min !", x_max, pol); // "x_max argument is %1%, but must be > x_min %2!", x_max, x_min, pol); would be better. // But would require replication of all helpers functions in /policies/error_handling.hpp for two values, @@ -100,7 +106,7 @@ namespace boost } // bool check_x_minmax template - inline bool check_prob(const char* function, const RealType& p, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_prob(const char* function, const RealType& p, RealType* result, const Policy& pol) { if ((p < 0) || (p > 1) || !(boost::math::isfinite)(p)) { @@ -113,7 +119,7 @@ namespace boost } // bool check_prob template - inline bool check_x(const char* function, const RealType& x_min, const RealType& x_max, const RealType& x, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_x(const char* function, const RealType& x_min, const RealType& x_max, const RealType& x, RealType* result, const Policy& pol) { // Check x finite and x_min < x < x_max. if (!(boost::math::isfinite)(x)) { @@ -137,7 +143,7 @@ namespace boost } // bool check_x template - inline bool check_dist(const char* function, const RealType& x_min, const RealType& x_max, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* function, const RealType& x_min, const RealType& x_max, RealType* result, const Policy& pol) { // Check both x_min and x_max finite, and x_min < x_max. return check_x_min(function, x_min, result, pol) && check_x_max(function, x_max, result, pol) @@ -145,14 +151,14 @@ namespace boost } // bool check_dist template - inline bool check_dist_and_x(const char* function, const RealType& x_min, const RealType& x_max, RealType x, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_dist_and_x(const char* function, const RealType& x_min, const RealType& x_max, RealType x, RealType* result, const Policy& pol) { return check_dist(function, x_min, x_max, result, pol) && arcsine_detail::check_x(function, x_min, x_max, x, result, pol); } // bool check_dist_and_x template - inline bool check_dist_and_prob(const char* function, const RealType& x_min, const RealType& x_max, RealType p, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_dist_and_prob(const char* function, const RealType& x_min, const RealType& x_max, RealType p, RealType* result, const Policy& pol) { return check_dist(function, x_min, x_max, result, pol) && check_prob(function, p, result, pol); @@ -167,7 +173,7 @@ namespace boost typedef RealType value_type; typedef Policy policy_type; - arcsine_distribution(RealType x_min = 0, RealType x_max = 1) : m_x_min(x_min), m_x_max(x_max) + BOOST_MATH_GPU_ENABLED arcsine_distribution(RealType x_min = 0, RealType x_max = 1) : m_x_min(x_min), m_x_max(x_max) { // Default beta (alpha = beta = 0.5) is standard arcsine with x_min = 0, x_max = 1. // Generalized to allow x_min and x_max to be specified. RealType result; @@ -178,11 +184,11 @@ namespace boost &result, Policy()); } // arcsine_distribution constructor. // Accessor functions: - RealType x_min() const + BOOST_MATH_GPU_ENABLED RealType x_min() const { return m_x_min; } - RealType x_max() const + BOOST_MATH_GPU_ENABLED RealType x_max() const { return m_x_max; } @@ -203,21 +209,21 @@ namespace boost #endif template - inline const std::pair range(const arcsine_distribution& dist) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair range(const arcsine_distribution& dist) { // Range of permissible values for random variable x. using boost::math::tools::max_value; - return std::pair(static_cast(dist.x_min()), static_cast(dist.x_max())); + return boost::math::pair(static_cast(dist.x_min()), static_cast(dist.x_max())); } template - inline const std::pair support(const arcsine_distribution& dist) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair support(const arcsine_distribution& dist) { // Range of supported values for random variable x. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. - return std::pair(static_cast(dist.x_min()), static_cast(dist.x_max())); + return boost::math::pair(static_cast(dist.x_min()), static_cast(dist.x_max())); } template - inline RealType mean(const arcsine_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mean(const arcsine_distribution& dist) { // Mean of arcsine distribution . RealType result; RealType x_min = dist.x_min(); @@ -236,7 +242,7 @@ namespace boost } // mean template - inline RealType variance(const arcsine_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType variance(const arcsine_distribution& dist) { // Variance of standard arcsine distribution = (1-0)/8 = 0.125. RealType result; RealType x_min = dist.x_min(); @@ -254,7 +260,7 @@ namespace boost } // variance template - inline RealType mode(const arcsine_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline RealType mode(const arcsine_distribution& /* dist */) { //There are always [*two] values for the mode, at ['x_min] and at ['x_max], default 0 and 1, // so instead we raise the exception domain_error. return policies::raise_domain_error( @@ -265,7 +271,7 @@ namespace boost } // mode template - inline RealType median(const arcsine_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType median(const arcsine_distribution& dist) { // Median of arcsine distribution (a + b) / 2 == mean. RealType x_min = dist.x_min(); RealType x_max = dist.x_max(); @@ -283,7 +289,7 @@ namespace boost } template - inline RealType skewness(const arcsine_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType skewness(const arcsine_distribution& dist) { RealType result; RealType x_min = dist.x_min(); @@ -302,7 +308,7 @@ namespace boost } // skewness template - inline RealType kurtosis_excess(const arcsine_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const arcsine_distribution& dist) { RealType result; RealType x_min = dist.x_min(); @@ -322,7 +328,7 @@ namespace boost } // kurtosis_excess template - inline RealType kurtosis(const arcsine_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const arcsine_distribution& dist) { RealType result; RealType x_min = dist.x_min(); @@ -342,12 +348,12 @@ namespace boost } // kurtosis template - inline RealType pdf(const arcsine_distribution& dist, const RealType& xx) + BOOST_MATH_GPU_ENABLED inline RealType pdf(const arcsine_distribution& dist, const RealType& xx) { // Probability Density/Mass Function arcsine. BOOST_FPU_EXCEPTION_GUARD BOOST_MATH_STD_USING // For ADL of std functions. - static const char* function = "boost::math::pdf(arcsine_distribution<%1%> const&, %1%)"; + constexpr auto function = "boost::math::pdf(arcsine_distribution<%1%> const&, %1%)"; RealType lo = dist.x_min(); RealType hi = dist.x_max(); @@ -368,11 +374,11 @@ namespace boost } // pdf template - inline RealType cdf(const arcsine_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED inline RealType cdf(const arcsine_distribution& dist, const RealType& x) { // Cumulative Distribution Function arcsine. BOOST_MATH_STD_USING // For ADL of std functions. - static const char* function = "boost::math::cdf(arcsine_distribution<%1%> const&, %1%)"; + constexpr auto function = "boost::math::cdf(arcsine_distribution<%1%> const&, %1%)"; RealType x_min = dist.x_min(); RealType x_max = dist.x_max(); @@ -401,10 +407,10 @@ namespace boost } // arcsine cdf template - inline RealType cdf(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { // Complemented Cumulative Distribution Function arcsine. BOOST_MATH_STD_USING // For ADL of std functions. - static const char* function = "boost::math::cdf(arcsine_distribution<%1%> const&, %1%)"; + constexpr auto function = "boost::math::cdf(arcsine_distribution<%1%> const&, %1%)"; RealType x = c.param; arcsine_distribution const& dist = c.dist; @@ -437,7 +443,7 @@ namespace boost } // arcsine ccdf template - inline RealType quantile(const arcsine_distribution& dist, const RealType& p) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const arcsine_distribution& dist, const RealType& p) { // Quantile or Percent Point arcsine function or // Inverse Cumulative probability distribution function CDF. @@ -451,7 +457,7 @@ namespace boost using boost::math::constants::half_pi; - static const char* function = "boost::math::quantile(arcsine_distribution<%1%> const&, %1%)"; + constexpr auto function = "boost::math::quantile(arcsine_distribution<%1%> const&, %1%)"; RealType result = 0; // of argument checks: RealType x_min = dist.x_min(); @@ -481,7 +487,7 @@ namespace boost } // quantile template - inline RealType quantile(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { // Complement Quantile or Percent Point arcsine function. // Return the number of expected x for a given @@ -489,7 +495,7 @@ namespace boost BOOST_MATH_STD_USING // For ADL of std functions. using boost::math::constants::half_pi; - static const char* function = "boost::math::quantile(arcsine_distribution<%1%> const&, %1%)"; + constexpr auto function = "boost::math::quantile(arcsine_distribution<%1%> const&, %1%)"; // Error checks: RealType q = c.param; diff --git a/include/boost/math/distributions/bernoulli.hpp b/include/boost/math/distributions/bernoulli.hpp index cce209a6fb..f1c693f7f0 100644 --- a/include/boost/math/distributions/bernoulli.hpp +++ b/include/boost/math/distributions/bernoulli.hpp @@ -2,6 +2,7 @@ // Copyright John Maddock 2006. // Copyright Paul A. Bristow 2007. +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. @@ -27,13 +28,19 @@ #ifndef BOOST_MATH_SPECIAL_BERNOULLI_HPP #define BOOST_MATH_SPECIAL_BERNOULLI_HPP -#include #include +#include +#include #include // complements #include // error checks #include // isnan. +#include +#include +#ifndef BOOST_MATH_HAS_NVRTC #include +#include +#endif namespace boost { @@ -43,7 +50,7 @@ namespace boost { // Common error checking routines for bernoulli distribution functions: template - inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& /* pol */) + BOOST_MATH_GPU_ENABLED inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& /* pol */) { if(!(boost::math::isfinite)(p) || (p < 0) || (p > 1)) { @@ -55,23 +62,23 @@ namespace boost return true; } template - inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& /* pol */, const std::true_type&) + BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& /* pol */, const boost::math::true_type&) { return check_success_fraction(function, p, result, Policy()); } template - inline bool check_dist(const char* , const RealType& , RealType* , const Policy& /* pol */, const std::false_type&) + BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* , const RealType& , RealType* , const Policy& /* pol */, const boost::math::false_type&) { return true; } template - inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& /* pol */) + BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& /* pol */) { return check_dist(function, p, result, Policy(), typename policies::constructor_error_check::type()); } template - inline bool check_dist_and_k(const char* function, const RealType& p, RealType k, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_dist_and_k(const char* function, const RealType& p, RealType k, RealType* result, const Policy& pol) { if(check_dist(function, p, result, Policy(), typename policies::method_error_check::type()) == false) { @@ -87,7 +94,7 @@ namespace boost return true; } template - inline bool check_dist_and_prob(const char* function, RealType p, RealType prob, RealType* result, const Policy& /* pol */) + BOOST_MATH_GPU_ENABLED inline bool check_dist_and_prob(const char* function, RealType p, RealType prob, RealType* result, const Policy& /* pol */) { if((check_dist(function, p, result, Policy(), typename policies::method_error_check::type()) && detail::check_probability(function, prob, result, Policy())) == false) { @@ -105,7 +112,7 @@ namespace boost typedef RealType value_type; typedef Policy policy_type; - bernoulli_distribution(RealType p = 0.5) : m_p(p) + BOOST_MATH_GPU_ENABLED bernoulli_distribution(RealType p = 0.5) : m_p(p) { // Default probability = half suits 'fair' coin tossing // where probability of heads == probability of tails. RealType result; // of checks. @@ -115,7 +122,7 @@ namespace boost &result, Policy()); } // bernoulli_distribution constructor. - RealType success_fraction() const + BOOST_MATH_GPU_ENABLED RealType success_fraction() const { // Probability. return m_p; } @@ -132,21 +139,21 @@ namespace boost #endif template - inline const std::pair range(const bernoulli_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair range(const bernoulli_distribution& /* dist */) { // Range of permissible values for random variable k = {0, 1}. using boost::math::tools::max_value; - return std::pair(static_cast(0), static_cast(1)); + return boost::math::pair(static_cast(0), static_cast(1)); } template - inline const std::pair support(const bernoulli_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair support(const bernoulli_distribution& /* dist */) { // Range of supported values for random variable k = {0, 1}. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. - return std::pair(static_cast(0), static_cast(1)); + return boost::math::pair(static_cast(0), static_cast(1)); } template - inline RealType mean(const bernoulli_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mean(const bernoulli_distribution& dist) { // Mean of bernoulli distribution = p (n = 1). return dist.success_fraction(); } // mean @@ -159,13 +166,13 @@ namespace boost //} // median template - inline RealType variance(const bernoulli_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType variance(const bernoulli_distribution& dist) { // Variance of bernoulli distribution =p * q. return dist.success_fraction() * (1 - dist.success_fraction()); } // variance template - RealType pdf(const bernoulli_distribution& dist, const RealType& k) + BOOST_MATH_GPU_ENABLED RealType pdf(const bernoulli_distribution& dist, const RealType& k) { // Probability Density/Mass Function. BOOST_FPU_EXCEPTION_GUARD // Error check: @@ -190,7 +197,7 @@ namespace boost } // pdf template - inline RealType cdf(const bernoulli_distribution& dist, const RealType& k) + BOOST_MATH_GPU_ENABLED inline RealType cdf(const bernoulli_distribution& dist, const RealType& k) { // Cumulative Distribution Function Bernoulli. RealType p = dist.success_fraction(); // Error check: @@ -214,7 +221,7 @@ namespace boost } // bernoulli cdf template - inline RealType cdf(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { // Complemented Cumulative Distribution Function bernoulli. RealType const& k = c.param; bernoulli_distribution const& dist = c.dist; @@ -240,7 +247,7 @@ namespace boost } // bernoulli cdf complement template - inline RealType quantile(const bernoulli_distribution& dist, const RealType& p) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const bernoulli_distribution& dist, const RealType& p) { // Quantile or Percent Point Bernoulli function. // Return the number of expected successes k either 0 or 1. // for a given probability p. @@ -265,7 +272,7 @@ namespace boost } // quantile template - inline RealType quantile(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { // Quantile or Percent Point bernoulli function. // Return the number of expected successes k for a given // complement of the probability q. @@ -294,13 +301,13 @@ namespace boost } // quantile complemented. template - inline RealType mode(const bernoulli_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mode(const bernoulli_distribution& dist) { return static_cast((dist.success_fraction() <= 0.5) ? 0 : 1); // p = 0.5 can be 0 or 1 } template - inline RealType skewness(const bernoulli_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType skewness(const bernoulli_distribution& dist) { BOOST_MATH_STD_USING; // Aid ADL for sqrt. RealType p = dist.success_fraction(); @@ -308,7 +315,7 @@ namespace boost } template - inline RealType kurtosis_excess(const bernoulli_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const bernoulli_distribution& dist) { RealType p = dist.success_fraction(); // Note Wolfram says this is kurtosis in text, but gamma2 is the kurtosis excess, @@ -319,7 +326,7 @@ namespace boost } template - inline RealType kurtosis(const bernoulli_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const bernoulli_distribution& dist) { RealType p = dist.success_fraction(); return 1 / (1 - p) + 1/p -6 + 3; diff --git a/include/boost/math/distributions/beta.hpp b/include/boost/math/distributions/beta.hpp index 6c17ffa1a2..fef991a870 100644 --- a/include/boost/math/distributions/beta.hpp +++ b/include/boost/math/distributions/beta.hpp @@ -25,12 +25,15 @@ #ifndef BOOST_MATH_DIST_BETA_HPP #define BOOST_MATH_DIST_BETA_HPP +#include +#include #include #include // for beta. #include // complements. #include // error checks #include // isnan. #include // for root finding. +#include #if defined (BOOST_MSVC) # pragma warning(push) @@ -38,8 +41,6 @@ // in domain_error_imp in error_handling #endif -#include - namespace boost { namespace math @@ -48,7 +49,7 @@ namespace boost { // Common error checking routines for beta distribution functions: template - inline bool check_alpha(const char* function, const RealType& alpha, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_alpha(const char* function, const RealType& alpha, RealType* result, const Policy& pol) { if(!(boost::math::isfinite)(alpha) || (alpha <= 0)) { @@ -61,7 +62,7 @@ namespace boost } // bool check_alpha template - inline bool check_beta(const char* function, const RealType& beta, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_beta(const char* function, const RealType& beta, RealType* result, const Policy& pol) { if(!(boost::math::isfinite)(beta) || (beta <= 0)) { @@ -74,7 +75,7 @@ namespace boost } // bool check_beta template - inline bool check_prob(const char* function, const RealType& p, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_prob(const char* function, const RealType& p, RealType* result, const Policy& pol) { if((p < 0) || (p > 1) || !(boost::math::isfinite)(p)) { @@ -87,7 +88,7 @@ namespace boost } // bool check_prob template - inline bool check_x(const char* function, const RealType& x, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_x(const char* function, const RealType& x, RealType* result, const Policy& pol) { if(!(boost::math::isfinite)(x) || (x < 0) || (x > 1)) { @@ -100,28 +101,28 @@ namespace boost } // bool check_x template - inline bool check_dist(const char* function, const RealType& alpha, const RealType& beta, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* function, const RealType& alpha, const RealType& beta, RealType* result, const Policy& pol) { // Check both alpha and beta. return check_alpha(function, alpha, result, pol) && check_beta(function, beta, result, pol); } // bool check_dist template - inline bool check_dist_and_x(const char* function, const RealType& alpha, const RealType& beta, RealType x, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_dist_and_x(const char* function, const RealType& alpha, const RealType& beta, RealType x, RealType* result, const Policy& pol) { return check_dist(function, alpha, beta, result, pol) && beta_detail::check_x(function, x, result, pol); } // bool check_dist_and_x template - inline bool check_dist_and_prob(const char* function, const RealType& alpha, const RealType& beta, RealType p, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_dist_and_prob(const char* function, const RealType& alpha, const RealType& beta, RealType p, RealType* result, const Policy& pol) { return check_dist(function, alpha, beta, result, pol) && check_prob(function, p, result, pol); } // bool check_dist_and_prob template - inline bool check_mean(const char* function, const RealType& mean, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_mean(const char* function, const RealType& mean, RealType* result, const Policy& pol) { if(!(boost::math::isfinite)(mean) || (mean <= 0)) { @@ -133,7 +134,7 @@ namespace boost return true; } // bool check_mean template - inline bool check_variance(const char* function, const RealType& variance, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_variance(const char* function, const RealType& variance, RealType* result, const Policy& pol) { if(!(boost::math::isfinite)(variance) || (variance <= 0)) { @@ -157,7 +158,7 @@ namespace boost typedef RealType value_type; typedef Policy policy_type; - beta_distribution(RealType l_alpha = 1, RealType l_beta = 1) : m_alpha(l_alpha), m_beta(l_beta) + BOOST_MATH_GPU_ENABLED beta_distribution(RealType l_alpha = 1, RealType l_beta = 1) : m_alpha(l_alpha), m_beta(l_beta) { RealType result; beta_detail::check_dist( @@ -167,11 +168,11 @@ namespace boost &result, Policy()); } // beta_distribution constructor. // Accessor functions: - RealType alpha() const + BOOST_MATH_GPU_ENABLED RealType alpha() const { return m_alpha; } - RealType beta() const + BOOST_MATH_GPU_ENABLED RealType beta() const { // . return m_beta; } @@ -183,11 +184,11 @@ namespace boost // http://www.itl.nist.gov/div898/handbook/eda/section3/eda366h.htm // http://www.epi.ucdavis.edu/diagnostictests/betabuster.html - static RealType find_alpha( + BOOST_MATH_GPU_ENABLED static RealType find_alpha( RealType mean, // Expected value of mean. RealType variance) // Expected value of variance. { - static const char* function = "boost::math::beta_distribution<%1%>::find_alpha"; + constexpr auto function = "boost::math::beta_distribution<%1%>::find_alpha"; RealType result = 0; // of error checks. if(false == ( @@ -201,11 +202,11 @@ namespace boost return mean * (( (mean * (1 - mean)) / variance)- 1); } // RealType find_alpha - static RealType find_beta( + BOOST_MATH_GPU_ENABLED static RealType find_beta( RealType mean, // Expected value of mean. RealType variance) // Expected value of variance. { - static const char* function = "boost::math::beta_distribution<%1%>::find_beta"; + constexpr auto function = "boost::math::beta_distribution<%1%>::find_beta"; RealType result = 0; // of error checks. if(false == ( @@ -223,12 +224,12 @@ namespace boost // Estimate alpha & beta from either alpha or beta, and x and probability. // Uses for these parameter estimators are unclear. - static RealType find_alpha( + BOOST_MATH_GPU_ENABLED static RealType find_alpha( RealType beta, // from beta. RealType x, // x. RealType probability) // cdf { - static const char* function = "boost::math::beta_distribution<%1%>::find_alpha"; + constexpr auto function = "boost::math::beta_distribution<%1%>::find_alpha"; RealType result = 0; // of error checks. if(false == ( @@ -245,13 +246,13 @@ namespace boost return static_cast(ibeta_inva(beta, x, probability, Policy())); } // RealType find_alpha(beta, a, probability) - static RealType find_beta( + BOOST_MATH_GPU_ENABLED static RealType find_beta( // ibeta_invb(T b, T x, T p); (alpha, x, cdf,) RealType alpha, // alpha. RealType x, // probability x. RealType probability) // probability cdf. { - static const char* function = "boost::math::beta_distribution<%1%>::find_beta"; + constexpr auto function = "boost::math::beta_distribution<%1%>::find_beta"; RealType result = 0; // of error checks. if(false == ( @@ -281,27 +282,27 @@ namespace boost #endif template - inline const std::pair range(const beta_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair range(const beta_distribution& /* dist */) { // Range of permissible values for random variable x. using boost::math::tools::max_value; - return std::pair(static_cast(0), static_cast(1)); + return boost::math::pair(static_cast(0), static_cast(1)); } template - inline const std::pair support(const beta_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair support(const beta_distribution& /* dist */) { // Range of supported values for random variable x. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. - return std::pair(static_cast(0), static_cast(1)); + return boost::math::pair(static_cast(0), static_cast(1)); } template - inline RealType mean(const beta_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mean(const beta_distribution& dist) { // Mean of beta distribution = np. return dist.alpha() / (dist.alpha() + dist.beta()); } // mean template - inline RealType variance(const beta_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType variance(const beta_distribution& dist) { // Variance of beta distribution = np(1-p). RealType a = dist.alpha(); RealType b = dist.beta(); @@ -309,9 +310,9 @@ namespace boost } // variance template - inline RealType mode(const beta_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mode(const beta_distribution& dist) { - static const char* function = "boost::math::mode(beta_distribution<%1%> const&)"; + constexpr auto function = "boost::math::mode(beta_distribution<%1%> const&)"; RealType result; if ((dist.alpha() <= 1)) @@ -343,7 +344,7 @@ namespace boost //But WILL be provided by the derived accessor as quantile(0.5). template - inline RealType skewness(const beta_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType skewness(const beta_distribution& dist) { BOOST_MATH_STD_USING // ADL of std functions. RealType a = dist.alpha(); @@ -352,7 +353,7 @@ namespace boost } // skewness template - inline RealType kurtosis_excess(const beta_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const beta_distribution& dist) { RealType a = dist.alpha(); RealType b = dist.beta(); @@ -363,17 +364,17 @@ namespace boost } // kurtosis_excess template - inline RealType kurtosis(const beta_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const beta_distribution& dist) { return 3 + kurtosis_excess(dist); } // kurtosis template - inline RealType pdf(const beta_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED inline RealType pdf(const beta_distribution& dist, const RealType& x) { // Probability Density/Mass Function. BOOST_FPU_EXCEPTION_GUARD - static const char* function = "boost::math::pdf(beta_distribution<%1%> const&, %1%)"; + constexpr auto function = "boost::math::pdf(beta_distribution<%1%> const&, %1%)"; BOOST_MATH_STD_USING // for ADL of std functions @@ -428,11 +429,11 @@ namespace boost } // pdf template - inline RealType cdf(const beta_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED inline RealType cdf(const beta_distribution& dist, const RealType& x) { // Cumulative Distribution Function beta. BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(beta_distribution<%1%> const&, %1%)"; + constexpr auto function = "boost::math::cdf(beta_distribution<%1%> const&, %1%)"; RealType a = dist.alpha(); RealType b = dist.beta(); @@ -459,12 +460,12 @@ namespace boost } // beta cdf template - inline RealType cdf(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { // Complemented Cumulative Distribution Function beta. BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(beta_distribution<%1%> const&, %1%)"; + constexpr auto function = "boost::math::cdf(beta_distribution<%1%> const&, %1%)"; RealType const& x = c.param; beta_distribution const& dist = c.dist; @@ -495,7 +496,7 @@ namespace boost } // beta cdf template - inline RealType quantile(const beta_distribution& dist, const RealType& p) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const beta_distribution& dist, const RealType& p) { // Quantile or Percent Point beta function or // Inverse Cumulative probability distribution function CDF. // Return x (0 <= x <= 1), @@ -505,7 +506,7 @@ namespace boost // will be less than or equal to that value // is whatever probability you supplied as an argument. - static const char* function = "boost::math::quantile(beta_distribution<%1%> const&, %1%)"; + constexpr auto function = "boost::math::quantile(beta_distribution<%1%> const&, %1%)"; RealType result = 0; // of argument checks: RealType a = dist.alpha(); @@ -530,12 +531,12 @@ namespace boost } // quantile template - inline RealType quantile(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { // Complement Quantile or Percent Point beta function . // Return the number of expected x for a given // complement of the probability q. - static const char* function = "boost::math::quantile(beta_distribution<%1%> const&, %1%)"; + constexpr auto function = "boost::math::quantile(beta_distribution<%1%> const&, %1%)"; // // Error checks: diff --git a/include/boost/math/distributions/binomial.hpp b/include/boost/math/distributions/binomial.hpp index cf7451104b..b17893e422 100644 --- a/include/boost/math/distributions/binomial.hpp +++ b/include/boost/math/distributions/binomial.hpp @@ -79,6 +79,8 @@ #ifndef BOOST_MATH_SPECIAL_BINOMIAL_HPP #define BOOST_MATH_SPECIAL_BINOMIAL_HPP +#include +#include #include #include // for incomplete beta. #include // complements @@ -100,7 +102,7 @@ namespace boost namespace binomial_detail{ // common error checking routines for binomial distribution functions: template - inline bool check_N(const char* function, const RealType& N, RealType* result, const Policy& pol) + BOOST_MATH_CUDA_ENABLED inline bool check_N(const char* function, const RealType& N, RealType* result, const Policy& pol) { if((N < 0) || !(boost::math::isfinite)(N)) { @@ -112,7 +114,7 @@ namespace boost return true; } template - inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& pol) + BOOST_MATH_CUDA_ENABLED inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& pol) { if((p < 0) || (p > 1) || !(boost::math::isfinite)(p)) { @@ -124,7 +126,7 @@ namespace boost return true; } template - inline bool check_dist(const char* function, const RealType& N, const RealType& p, RealType* result, const Policy& pol) + BOOST_MATH_CUDA_ENABLED inline bool check_dist(const char* function, const RealType& N, const RealType& p, RealType* result, const Policy& pol) { return check_success_fraction( function, p, result, pol) @@ -132,7 +134,7 @@ namespace boost function, N, result, pol); } template - inline bool check_dist_and_k(const char* function, const RealType& N, const RealType& p, RealType k, RealType* result, const Policy& pol) + BOOST_MATH_CUDA_ENABLED inline bool check_dist_and_k(const char* function, const RealType& N, const RealType& p, RealType k, RealType* result, const Policy& pol) { if(check_dist(function, N, p, result, pol) == false) return false; @@ -153,7 +155,7 @@ namespace boost return true; } template - inline bool check_dist_and_prob(const char* function, const RealType& N, RealType p, RealType prob, RealType* result, const Policy& pol) + BOOST_MATH_CUDA_ENABLED inline bool check_dist_and_prob(const char* function, const RealType& N, RealType p, RealType prob, RealType* result, const Policy& pol) { if((check_dist(function, N, p, result, pol) && detail::check_probability(function, prob, result, pol)) == false) return false; @@ -161,7 +163,7 @@ namespace boost } template - T inverse_binomial_cornish_fisher(T n, T sf, T p, T q, const Policy& pol) + BOOST_MATH_CUDA_ENABLED T inverse_binomial_cornish_fisher(T n, T sf, T p, T q, const Policy& pol) { BOOST_MATH_STD_USING // mean: @@ -196,7 +198,7 @@ namespace boost } template - RealType quantile_imp(const binomial_distribution& dist, const RealType& p, const RealType& q, bool comp) + BOOST_MATH_CUDA_ENABLED RealType quantile_imp(const binomial_distribution& dist, const RealType& p, const RealType& q, bool comp) { // Quantile or Percent Point Binomial function. // Return the number of expected successes k, // for a given probability p. @@ -290,11 +292,11 @@ namespace boost &r, Policy()); } // binomial_distribution constructor. - RealType success_fraction() const + BOOST_MATH_CUDA_ENABLED RealType success_fraction() const { // Probability. return m_p; } - RealType trials() const + BOOST_MATH_CUDA_ENABLED RealType trials() const { // Total number of trials. return m_n; } @@ -310,13 +312,13 @@ namespace boost // these functions are used // to obtain confidence intervals for the success fraction. // - static RealType find_lower_bound_on_p( + BOOST_MATH_CUDA_ENABLED static RealType find_lower_bound_on_p( RealType trials, RealType successes, RealType probability, interval_type t = clopper_pearson_exact_interval) { - static const char* function = "boost::math::binomial_distribution<%1%>::find_lower_bound_on_p"; + BOOST_MATH_STATIC const char* function = "boost::math::binomial_distribution<%1%>::find_lower_bound_on_p"; // Error checks: RealType result = 0; if(false == binomial_detail::check_dist_and_k( @@ -335,13 +337,13 @@ namespace boost return (t == clopper_pearson_exact_interval) ? ibeta_inv(successes, trials - successes + 1, probability, static_cast(nullptr), Policy()) : ibeta_inv(successes + 0.5f, trials - successes + 0.5f, probability, static_cast(nullptr), Policy()); } - static RealType find_upper_bound_on_p( + BOOST_MATH_CUDA_ENABLED static RealType find_upper_bound_on_p( RealType trials, RealType successes, RealType probability, interval_type t = clopper_pearson_exact_interval) { - static const char* function = "boost::math::binomial_distribution<%1%>::find_upper_bound_on_p"; + BOOST_MATH_STATIC const char* function = "boost::math::binomial_distribution<%1%>::find_upper_bound_on_p"; // Error checks: RealType result = 0; if(false == binomial_detail::check_dist_and_k( @@ -363,12 +365,12 @@ namespace boost // or // "How many trials can I have to be P% sure of seeing fewer than k events?" // - static RealType find_minimum_number_of_trials( + BOOST_MATH_CUDA_ENABLED static RealType find_minimum_number_of_trials( RealType k, // number of events RealType p, // success fraction RealType alpha) // risk level { - static const char* function = "boost::math::binomial_distribution<%1%>::find_minimum_number_of_trials"; + BOOST_MATH_STATIC const char* function = "boost::math::binomial_distribution<%1%>::find_minimum_number_of_trials"; // Error checks: RealType result = 0; if(false == binomial_detail::check_dist_and_k( @@ -382,12 +384,12 @@ namespace boost return result + k; } - static RealType find_maximum_number_of_trials( + BOOST_MATH_CUDA_ENABLED static RealType find_maximum_number_of_trials( RealType k, // number of events RealType p, // success fraction RealType alpha) // risk level { - static const char* function = "boost::math::binomial_distribution<%1%>::find_maximum_number_of_trials"; + BOOST_MATH_STATIC const char* function = "boost::math::binomial_distribution<%1%>::find_maximum_number_of_trials"; // Error checks: RealType result = 0; if(false == binomial_detail::check_dist_and_k( @@ -419,33 +421,33 @@ namespace boost #endif template - const std::pair range(const binomial_distribution& dist) + BOOST_MATH_CUDA_ENABLED const boost::math::pair range(const binomial_distribution& dist) { // Range of permissible values for random variable k. using boost::math::tools::max_value; - return std::pair(static_cast(0), dist.trials()); + return boost::math::pair(static_cast(0), dist.trials()); } template - const std::pair support(const binomial_distribution& dist) + BOOST_MATH_CUDA_ENABLED const boost::math::pair support(const binomial_distribution& dist) { // Range of supported values for random variable k. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. - return std::pair(static_cast(0), dist.trials()); + return boost::math::pair(static_cast(0), dist.trials()); } template - inline RealType mean(const binomial_distribution& dist) + BOOST_MATH_CUDA_ENABLED inline RealType mean(const binomial_distribution& dist) { // Mean of Binomial distribution = np. return dist.trials() * dist.success_fraction(); } // mean template - inline RealType variance(const binomial_distribution& dist) + BOOST_MATH_CUDA_ENABLED inline RealType variance(const binomial_distribution& dist) { // Variance of Binomial distribution = np(1-p). return dist.trials() * dist.success_fraction() * (1 - dist.success_fraction()); } // variance template - RealType pdf(const binomial_distribution& dist, const RealType& k) + BOOST_MATH_CUDA_ENABLED RealType pdf(const binomial_distribution& dist, const RealType& k) { // Probability Density/Mass Function. BOOST_FPU_EXCEPTION_GUARD @@ -501,7 +503,7 @@ namespace boost } // pdf template - inline RealType cdf(const binomial_distribution& dist, const RealType& k) + BOOST_MATH_CUDA_ENABLED inline RealType cdf(const binomial_distribution& dist, const RealType& k) { // Cumulative Distribution Function Binomial. // The random variate k is the number of successes in n trials. // k argument may be integral, signed, or unsigned, or floating point. @@ -573,7 +575,7 @@ namespace boost } // binomial cdf template - inline RealType cdf(const complemented2_type, RealType>& c) + BOOST_MATH_CUDA_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { // Complemented Cumulative Distribution Function Binomial. // The random variate k is the number of successes in n trials. // k argument may be integral, signed, or unsigned, or floating point. @@ -650,19 +652,19 @@ namespace boost } // binomial cdf template - inline RealType quantile(const binomial_distribution& dist, const RealType& p) + BOOST_MATH_CUDA_ENABLED inline RealType quantile(const binomial_distribution& dist, const RealType& p) { return binomial_detail::quantile_imp(dist, p, RealType(1-p), false); } // quantile template - RealType quantile(const complemented2_type, RealType>& c) + BOOST_MATH_CUDA_ENABLED RealType quantile(const complemented2_type, RealType>& c) { return binomial_detail::quantile_imp(c.dist, RealType(1-c.param), c.param, true); } // quantile template - inline RealType mode(const binomial_distribution& dist) + BOOST_MATH_CUDA_ENABLED inline RealType mode(const binomial_distribution& dist) { BOOST_MATH_STD_USING // ADL of std functions. RealType p = dist.success_fraction(); @@ -671,7 +673,7 @@ namespace boost } template - inline RealType median(const binomial_distribution& dist) + BOOST_MATH_CUDA_ENABLED inline RealType median(const binomial_distribution& dist) { // Bounds for the median of the negative binomial distribution // VAN DE VEN R. ; WEBER N. C. ; // Univ. Sydney, school mathematics statistics, Sydney N.S.W. 2006, AUSTRALIE @@ -689,7 +691,7 @@ namespace boost } template - inline RealType skewness(const binomial_distribution& dist) + BOOST_MATH_CUDA_ENABLED inline RealType skewness(const binomial_distribution& dist) { BOOST_MATH_STD_USING // ADL of std functions. RealType p = dist.success_fraction(); @@ -698,7 +700,7 @@ namespace boost } template - inline RealType kurtosis(const binomial_distribution& dist) + BOOST_MATH_CUDA_ENABLED inline RealType kurtosis(const binomial_distribution& dist) { RealType p = dist.success_fraction(); RealType n = dist.trials(); @@ -706,7 +708,7 @@ namespace boost } template - inline RealType kurtosis_excess(const binomial_distribution& dist) + BOOST_MATH_CUDA_ENABLED inline RealType kurtosis_excess(const binomial_distribution& dist) { RealType p = dist.success_fraction(); RealType q = 1 - p; diff --git a/include/boost/math/distributions/cauchy.hpp b/include/boost/math/distributions/cauchy.hpp index d914cca77e..3a5af69e43 100644 --- a/include/boost/math/distributions/cauchy.hpp +++ b/include/boost/math/distributions/cauchy.hpp @@ -1,5 +1,6 @@ // Copyright John Maddock 2006, 2007. // Copyright Paul A. Bristow 2007. +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file @@ -13,12 +14,21 @@ #pragma warning(disable : 4127) // conditional expression is constant #endif -#include +#include +#include +#include +#include #include #include #include +#include +#include + +#ifndef BOOST_MATH_HAS_NVRTC +#include #include #include +#endif namespace boost{ namespace math { @@ -30,7 +40,7 @@ namespace detail { template -RealType cdf_imp(const cauchy_distribution& dist, const RealType& x, bool complement) +BOOST_MATH_GPU_ENABLED RealType cdf_imp(const cauchy_distribution& dist, const RealType& x, bool complement) { // // This calculates the cdf of the Cauchy distribution and/or its complement. @@ -47,14 +57,14 @@ RealType cdf_imp(const cauchy_distribution& dist, const RealTy // // Substituting into the above we get: // - // CDF = -atan(1/x) ; x < 0 + // CDF = -atan(1/x)/pi ; x < 0 // // So the procedure is to calculate the cdf for -fabs(x) // using the above formula, and then subtract from 1 when required // to get the result. // BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(cauchy<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(cauchy<%1%>&, %1%)"; RealType result = 0; RealType location = dist.location(); RealType scale = dist.scale(); @@ -66,14 +76,25 @@ RealType cdf_imp(const cauchy_distribution& dist, const RealTy { return result; } - if(std::numeric_limits::has_infinity && x == std::numeric_limits::infinity()) + #ifdef BOOST_MATH_HAS_GPU_SUPPORT + if(x > tools::max_value()) + { + return static_cast((complement) ? 0 : 1); + } + if(x < -tools::max_value()) + { + return static_cast((complement) ? 1 : 0); + } + #else + if(boost::math::numeric_limits::has_infinity && x == boost::math::numeric_limits::infinity()) { // cdf +infinity is unity. return static_cast((complement) ? 0 : 1); } - if(std::numeric_limits::has_infinity && x == -std::numeric_limits::infinity()) + if(boost::math::numeric_limits::has_infinity && x == -boost::math::numeric_limits::infinity()) { // cdf -infinity is zero. return static_cast((complement) ? 1 : 0); } + #endif if(false == detail::check_x(function, x, &result, Policy())) { // Catches x == NaN return result; @@ -88,20 +109,19 @@ RealType cdf_imp(const cauchy_distribution& dist, const RealTy } // cdf template -RealType quantile_imp( +BOOST_MATH_GPU_ENABLED RealType quantile_imp( const cauchy_distribution& dist, - const RealType& p, + RealType p, bool complement) { // This routine implements the quantile for the Cauchy distribution, // the value p may be the probability, or its complement if complement=true. // - // The procedure first performs argument reduction on p to avoid error - // when calculating the tangent, then calculates the distance from the + // The procedure calculates the distance from the // mid-point of the distribution. This is either added or subtracted // from the location parameter depending on whether `complement` is true. // - static const char* function = "boost::math::quantile(cauchy<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(cauchy<%1%>&, %1%)"; BOOST_MATH_STD_USING // for ADL of std functions RealType result = 0; @@ -129,16 +149,15 @@ RealType quantile_imp( return (complement ? 1 : -1) * policies::raise_overflow_error(function, 0, Policy()); } - RealType P = p - floor(p); // argument reduction of p: - if(P > 0.5) + if(p > 0.5) { - P = P - 1; + p = p - 1; } - if(P == 0.5) // special case: + if(p == 0.5) // special case: { return location; } - result = -scale / tan(constants::pi() * P); + result = -scale / tan(constants::pi() * p); return complement ? RealType(location - result) : RealType(location + result); } // quantile @@ -151,20 +170,20 @@ class cauchy_distribution typedef RealType value_type; typedef Policy policy_type; - cauchy_distribution(RealType l_location = 0, RealType l_scale = 1) + BOOST_MATH_GPU_ENABLED cauchy_distribution(RealType l_location = 0, RealType l_scale = 1) : m_a(l_location), m_hg(l_scale) { - static const char* function = "boost::math::cauchy_distribution<%1%>::cauchy_distribution"; + constexpr auto function = "boost::math::cauchy_distribution<%1%>::cauchy_distribution"; RealType result; detail::check_location(function, l_location, &result, Policy()); detail::check_scale(function, l_scale, &result, Policy()); } // cauchy_distribution - RealType location()const + BOOST_MATH_GPU_ENABLED RealType location()const { return m_a; } - RealType scale()const + BOOST_MATH_GPU_ENABLED RealType scale()const { return m_hg; } @@ -184,48 +203,48 @@ cauchy_distribution(RealType,RealType)->cauchy_distribution -inline const std::pair range(const cauchy_distribution&) +BOOST_MATH_GPU_ENABLED inline const boost::math::pair range(const cauchy_distribution&) { // Range of permissible values for random variable x. - if (std::numeric_limits::has_infinity) + BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits::has_infinity) { - return std::pair(-std::numeric_limits::infinity(), std::numeric_limits::infinity()); // - to + infinity. + return boost::math::pair(-boost::math::numeric_limits::infinity(), boost::math::numeric_limits::infinity()); // - to + infinity. } else { // Can only use max_value. using boost::math::tools::max_value; - return std::pair(-max_value(), max_value()); // - to + max. + return boost::math::pair(-max_value(), max_value()); // - to + max. } } template -inline const std::pair support(const cauchy_distribution& ) +BOOST_MATH_GPU_ENABLED inline const boost::math::pair support(const cauchy_distribution& ) { // Range of supported values for random variable x. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. - if (std::numeric_limits::has_infinity) + BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits::has_infinity) { - return std::pair(-std::numeric_limits::infinity(), std::numeric_limits::infinity()); // - to + infinity. + return boost::math::pair(-boost::math::numeric_limits::infinity(), boost::math::numeric_limits::infinity()); // - to + infinity. } else { // Can only use max_value. using boost::math::tools::max_value; - return std::pair(-tools::max_value(), max_value()); // - to + max. + return boost::math::pair(-tools::max_value(), max_value()); // - to + max. } } template -inline RealType pdf(const cauchy_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType pdf(const cauchy_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::pdf(cauchy<%1%>&, %1%)"; + constexpr auto function = "boost::math::pdf(cauchy<%1%>&, %1%)"; RealType result = 0; RealType location = dist.location(); RealType scale = dist.scale(); - if(false == detail::check_scale("boost::math::pdf(cauchy<%1%>&, %1%)", scale, &result, Policy())) + if(false == detail::check_scale(function, scale, &result, Policy())) { return result; } - if(false == detail::check_location("boost::math::pdf(cauchy<%1%>&, %1%)", location, &result, Policy())) + if(false == detail::check_location(function, location, &result, Policy())) { return result; } @@ -234,7 +253,7 @@ inline RealType pdf(const cauchy_distribution& dist, const Rea return 0; // pdf + and - infinity is zero. } // These produce MSVC 4127 warnings, so the above used instead. - //if(std::numeric_limits::has_infinity && abs(x) == std::numeric_limits::infinity()) + //if(boost::math::numeric_limits::has_infinity && abs(x) == boost::math::numeric_limits::infinity()) //{ // pdf + and - infinity is zero. // return 0; //} @@ -250,111 +269,112 @@ inline RealType pdf(const cauchy_distribution& dist, const Rea } // pdf template -inline RealType cdf(const cauchy_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const cauchy_distribution& dist, const RealType& x) { return detail::cdf_imp(dist, x, false); } // cdf template -inline RealType quantile(const cauchy_distribution& dist, const RealType& p) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const cauchy_distribution& dist, const RealType& p) { return detail::quantile_imp(dist, p, false); } // quantile template -inline RealType cdf(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { return detail::cdf_imp(c.dist, c.param, true); } // cdf complement template -inline RealType quantile(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { return detail::quantile_imp(c.dist, c.param, true); } // quantile complement template -inline RealType mean(const cauchy_distribution&) +BOOST_MATH_GPU_ENABLED inline RealType mean(const cauchy_distribution&) { // There is no mean: typedef typename Policy::assert_undefined_type assert_type; - static_assert(assert_type::value == 0, "assert type is undefined"); + static_assert(assert_type::value == 0, "The Cauchy Distribution has no mean"); return policies::raise_domain_error( "boost::math::mean(cauchy<%1%>&)", "The Cauchy distribution does not have a mean: " "the only possible return value is %1%.", - std::numeric_limits::quiet_NaN(), Policy()); + boost::math::numeric_limits::quiet_NaN(), Policy()); } template -inline RealType variance(const cauchy_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType variance(const cauchy_distribution& /*dist*/) { // There is no variance: typedef typename Policy::assert_undefined_type assert_type; - static_assert(assert_type::value == 0, "assert type is undefined"); + static_assert(assert_type::value == 0, "The Cauchy Distribution has no variance"); return policies::raise_domain_error( "boost::math::variance(cauchy<%1%>&)", "The Cauchy distribution does not have a variance: " "the only possible return value is %1%.", - std::numeric_limits::quiet_NaN(), Policy()); + boost::math::numeric_limits::quiet_NaN(), Policy()); } template -inline RealType mode(const cauchy_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mode(const cauchy_distribution& dist) { return dist.location(); } template -inline RealType median(const cauchy_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType median(const cauchy_distribution& dist) { return dist.location(); } + template -inline RealType skewness(const cauchy_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType skewness(const cauchy_distribution& /*dist*/) { // There is no skewness: typedef typename Policy::assert_undefined_type assert_type; - static_assert(assert_type::value == 0, "assert type is undefined"); + static_assert(assert_type::value == 0, "The Cauchy Distribution has no skewness"); return policies::raise_domain_error( "boost::math::skewness(cauchy<%1%>&)", "The Cauchy distribution does not have a skewness: " "the only possible return value is %1%.", - std::numeric_limits::quiet_NaN(), Policy()); // infinity? + boost::math::numeric_limits::quiet_NaN(), Policy()); // infinity? } template -inline RealType kurtosis(const cauchy_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const cauchy_distribution& /*dist*/) { // There is no kurtosis: typedef typename Policy::assert_undefined_type assert_type; - static_assert(assert_type::value == 0, "assert type is undefined"); + static_assert(assert_type::value == 0, "The Cauchy Distribution has no kurtosis"); return policies::raise_domain_error( "boost::math::kurtosis(cauchy<%1%>&)", "The Cauchy distribution does not have a kurtosis: " "the only possible return value is %1%.", - std::numeric_limits::quiet_NaN(), Policy()); + boost::math::numeric_limits::quiet_NaN(), Policy()); } template -inline RealType kurtosis_excess(const cauchy_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const cauchy_distribution& /*dist*/) { // There is no kurtosis excess: typedef typename Policy::assert_undefined_type assert_type; - static_assert(assert_type::value == 0, "assert type is undefined"); + static_assert(assert_type::value == 0, "The Cauchy Distribution has no kurtosis excess"); return policies::raise_domain_error( "boost::math::kurtosis_excess(cauchy<%1%>&)", "The Cauchy distribution does not have a kurtosis: " "the only possible return value is %1%.", - std::numeric_limits::quiet_NaN(), Policy()); + boost::math::numeric_limits::quiet_NaN(), Policy()); } template -inline RealType entropy(const cauchy_distribution & dist) +BOOST_MATH_GPU_ENABLED inline RealType entropy(const cauchy_distribution & dist) { using std::log; return log(2*constants::two_pi()*dist.scale()); diff --git a/include/boost/math/distributions/chi_squared.hpp b/include/boost/math/distributions/chi_squared.hpp index f5daddc0ad..3944569e89 100644 --- a/include/boost/math/distributions/chi_squared.hpp +++ b/include/boost/math/distributions/chi_squared.hpp @@ -9,14 +9,17 @@ #ifndef BOOST_MATH_DISTRIBUTIONS_CHI_SQUARED_HPP #define BOOST_MATH_DISTRIBUTIONS_CHI_SQUARED_HPP +#include +#include +#include +#include +#include #include #include // for incomplete beta. #include // complements #include // error checks #include -#include - namespace boost{ namespace math{ template > @@ -26,20 +29,20 @@ class chi_squared_distribution using value_type = RealType; using policy_type = Policy; - explicit chi_squared_distribution(RealType i) : m_df(i) + BOOST_MATH_GPU_ENABLED explicit chi_squared_distribution(RealType i) : m_df(i) { RealType result; detail::check_df( "boost::math::chi_squared_distribution<%1%>::chi_squared_distribution", m_df, &result, Policy()); } // chi_squared_distribution - RealType degrees_of_freedom()const + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom()const { return m_df; } // Parameter estimation: - static RealType find_degrees_of_freedom( + BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom( RealType difference_from_variance, RealType alpha, RealType beta, @@ -66,16 +69,16 @@ chi_squared_distribution(RealType)->chi_squared_distribution -inline std::pair range(const chi_squared_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline boost::math::pair range(const chi_squared_distribution& /*dist*/) { // Range of permissible values for random variable x. - if (std::numeric_limits::has_infinity) + BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits::has_infinity) { - return std::pair(static_cast(0), std::numeric_limits::infinity()); // 0 to + infinity. + return boost::math::pair(static_cast(0), boost::math::numeric_limits::infinity()); // 0 to + infinity. } else { using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); // 0 to + max. + return boost::math::pair(static_cast(0), max_value()); // 0 to + max. } } @@ -84,21 +87,21 @@ inline std::pair range(const chi_squared_distribution -inline std::pair support(const chi_squared_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline boost::math::pair support(const chi_squared_distribution& /*dist*/) { // Range of supported values for random variable x. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. - return std::pair(static_cast(0), tools::max_value()); // 0 to + infinity. + return boost::math::pair(static_cast(0), tools::max_value()); // 0 to + infinity. } template -RealType pdf(const chi_squared_distribution& dist, const RealType& chi_square) +BOOST_MATH_GPU_ENABLED RealType pdf(const chi_squared_distribution& dist, const RealType& chi_square) { BOOST_MATH_STD_USING // for ADL of std functions RealType degrees_of_freedom = dist.degrees_of_freedom(); // Error check: RealType error_result; - static const char* function = "boost::math::pdf(const chi_squared_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::pdf(const chi_squared_distribution<%1%>&, %1%)"; if(false == detail::check_df( function, degrees_of_freedom, &error_result, Policy())) @@ -132,12 +135,12 @@ RealType pdf(const chi_squared_distribution& dist, const RealT } // pdf template -inline RealType cdf(const chi_squared_distribution& dist, const RealType& chi_square) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const chi_squared_distribution& dist, const RealType& chi_square) { RealType degrees_of_freedom = dist.degrees_of_freedom(); // Error check: RealType error_result; - static const char* function = "boost::math::cdf(const chi_squared_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const chi_squared_distribution<%1%>&, %1%)"; if(false == detail::check_df( function, degrees_of_freedom, &error_result, Policy())) @@ -153,10 +156,10 @@ inline RealType cdf(const chi_squared_distribution& dist, cons } // cdf template -inline RealType quantile(const chi_squared_distribution& dist, const RealType& p) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const chi_squared_distribution& dist, const RealType& p) { RealType degrees_of_freedom = dist.degrees_of_freedom(); - static const char* function = "boost::math::quantile(const chi_squared_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const chi_squared_distribution<%1%>&, %1%)"; // Error check: RealType error_result; if(false == @@ -170,11 +173,11 @@ inline RealType quantile(const chi_squared_distribution& dist, } // quantile template -inline RealType cdf(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { RealType const& degrees_of_freedom = c.dist.degrees_of_freedom(); RealType const& chi_square = c.param; - static const char* function = "boost::math::cdf(const chi_squared_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const chi_squared_distribution<%1%>&, %1%)"; // Error check: RealType error_result; if(false == detail::check_df( @@ -191,11 +194,11 @@ inline RealType cdf(const complemented2_type -inline RealType quantile(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { RealType const& degrees_of_freedom = c.dist.degrees_of_freedom(); RealType const& q = c.param; - static const char* function = "boost::math::quantile(const chi_squared_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const chi_squared_distribution<%1%>&, %1%)"; // Error check: RealType error_result; if(false == ( @@ -208,22 +211,22 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const chi_squared_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mean(const chi_squared_distribution& dist) { // Mean of Chi-Squared distribution = v. return dist.degrees_of_freedom(); } // mean template -inline RealType variance(const chi_squared_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType variance(const chi_squared_distribution& dist) { // Variance of Chi-Squared distribution = 2v. return 2 * dist.degrees_of_freedom(); } // variance template -inline RealType mode(const chi_squared_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mode(const chi_squared_distribution& dist) { RealType df = dist.degrees_of_freedom(); - static const char* function = "boost::math::mode(const chi_squared_distribution<%1%>&)"; + constexpr auto function = "boost::math::mode(const chi_squared_distribution<%1%>&)"; if(df < 2) return policies::raise_domain_error( @@ -234,7 +237,7 @@ inline RealType mode(const chi_squared_distribution& dist) } template -inline RealType skewness(const chi_squared_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType skewness(const chi_squared_distribution& dist) { BOOST_MATH_STD_USING // For ADL RealType df = dist.degrees_of_freedom(); @@ -242,14 +245,14 @@ inline RealType skewness(const chi_squared_distribution& dist) } template -inline RealType kurtosis(const chi_squared_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const chi_squared_distribution& dist) { RealType df = dist.degrees_of_freedom(); return 3 + 12 / df; } template -inline RealType kurtosis_excess(const chi_squared_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const chi_squared_distribution& dist) { RealType df = dist.degrees_of_freedom(); return 12 / df; @@ -264,12 +267,12 @@ namespace detail template struct df_estimator { - df_estimator(RealType a, RealType b, RealType variance, RealType delta) + BOOST_MATH_GPU_ENABLED df_estimator(RealType a, RealType b, RealType variance, RealType delta) : alpha(a), beta(b), ratio(delta/variance) { // Constructor } - RealType operator()(const RealType& df) + BOOST_MATH_GPU_ENABLED RealType operator()(const RealType& df) { if(df <= tools::min_value()) return 1; @@ -297,14 +300,14 @@ struct df_estimator } // namespace detail template -RealType chi_squared_distribution::find_degrees_of_freedom( +BOOST_MATH_GPU_ENABLED RealType chi_squared_distribution::find_degrees_of_freedom( RealType difference_from_variance, RealType alpha, RealType beta, RealType variance, RealType hint) { - static const char* function = "boost::math::chi_squared_distribution<%1%>::find_degrees_of_freedom(%1%,%1%,%1%,%1%,%1%)"; + constexpr auto function = "boost::math::chi_squared_distribution<%1%>::find_degrees_of_freedom(%1%,%1%,%1%,%1%,%1%)"; // Check for domain errors: RealType error_result; if(false == @@ -321,8 +324,8 @@ RealType chi_squared_distribution::find_degrees_of_freedom( detail::df_estimator f(alpha, beta, variance, difference_from_variance); tools::eps_tolerance tol(policies::digits()); - std::uintmax_t max_iter = policies::get_max_root_iterations(); - std::pair r = + boost::math::uintmax_t max_iter = policies::get_max_root_iterations(); + boost::math::pair r = tools::bracket_and_solve_root(f, hint, RealType(2), false, tol, max_iter, Policy()); RealType result = r.first + (r.second - r.first) / 2; if(max_iter >= policies::get_max_root_iterations()) diff --git a/include/boost/math/distributions/complement.hpp b/include/boost/math/distributions/complement.hpp index 5c062a7cdf..c63b8a5041 100644 --- a/include/boost/math/distributions/complement.hpp +++ b/include/boost/math/distributions/complement.hpp @@ -1,5 +1,6 @@ // (C) Copyright John Maddock 2006. // (C) Copyright Paul A. Bristow 2006. +// (C) Copyright Matt Borland 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -7,6 +8,8 @@ #ifndef BOOST_STATS_COMPLEMENT_HPP #define BOOST_STATS_COMPLEMENT_HPP +#include + // // This code really defines our own tuple type. // It would be nice to reuse boost::math::tuple @@ -19,7 +22,7 @@ namespace boost{ namespace math{ template struct complemented2_type { - complemented2_type( + BOOST_MATH_GPU_ENABLED complemented2_type( const Dist& d, const RealType& p1) : dist(d), @@ -35,7 +38,7 @@ struct complemented2_type template struct complemented3_type { - complemented3_type( + BOOST_MATH_GPU_ENABLED complemented3_type( const Dist& d, const RealType1& p1, const RealType2& p2) @@ -53,7 +56,7 @@ struct complemented3_type template struct complemented4_type { - complemented4_type( + BOOST_MATH_GPU_ENABLED complemented4_type( const Dist& d, const RealType1& p1, const RealType2& p2, @@ -74,7 +77,7 @@ struct complemented4_type template struct complemented5_type { - complemented5_type( + BOOST_MATH_GPU_ENABLED complemented5_type( const Dist& d, const RealType1& p1, const RealType2& p2, @@ -98,7 +101,7 @@ struct complemented5_type template struct complemented6_type { - complemented6_type( + BOOST_MATH_GPU_ENABLED complemented6_type( const Dist& d, const RealType1& p1, const RealType2& p2, @@ -125,7 +128,7 @@ struct complemented6_type template struct complemented7_type { - complemented7_type( + BOOST_MATH_GPU_ENABLED complemented7_type( const Dist& d, const RealType1& p1, const RealType2& p2, @@ -153,37 +156,37 @@ struct complemented7_type }; template -inline complemented2_type complement(const Dist& d, const RealType& r) +BOOST_MATH_GPU_ENABLED inline complemented2_type complement(const Dist& d, const RealType& r) { return complemented2_type(d, r); } template -inline complemented3_type complement(const Dist& d, const RealType1& r1, const RealType2& r2) +BOOST_MATH_GPU_ENABLED inline complemented3_type complement(const Dist& d, const RealType1& r1, const RealType2& r2) { return complemented3_type(d, r1, r2); } template -inline complemented4_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3) +BOOST_MATH_GPU_ENABLED inline complemented4_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3) { return complemented4_type(d, r1, r2, r3); } template -inline complemented5_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4) +BOOST_MATH_GPU_ENABLED inline complemented5_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4) { return complemented5_type(d, r1, r2, r3, r4); } template -inline complemented6_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4, const RealType5& r5) +BOOST_MATH_GPU_ENABLED inline complemented6_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4, const RealType5& r5) { return complemented6_type(d, r1, r2, r3, r4, r5); } template -inline complemented7_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4, const RealType5& r5, const RealType6& r6) +BOOST_MATH_GPU_ENABLED inline complemented7_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4, const RealType5& r5, const RealType6& r6) { return complemented7_type(d, r1, r2, r3, r4, r5, r6); } diff --git a/include/boost/math/distributions/detail/common_error_handling.hpp b/include/boost/math/distributions/detail/common_error_handling.hpp index f03f2c49b8..06e3c105bd 100644 --- a/include/boost/math/distributions/detail/common_error_handling.hpp +++ b/include/boost/math/distributions/detail/common_error_handling.hpp @@ -1,5 +1,6 @@ // Copyright John Maddock 2006, 2007. // Copyright Paul A. Bristow 2006, 2007, 2012. +// Copyright Matt Borland 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. @@ -9,6 +10,8 @@ #ifndef BOOST_MATH_DISTRIBUTIONS_COMMON_ERROR_HANDLING_HPP #define BOOST_MATH_DISTRIBUTIONS_COMMON_ERROR_HANDLING_HPP +#include +#include #include #include // using boost::math::isfinite; @@ -23,7 +26,7 @@ namespace boost{ namespace math{ namespace detail { template -inline bool check_probability(const char* function, RealType const& prob, RealType* result, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline bool check_probability(const char* function, RealType const& prob, RealType* result, const Policy& pol) { if((prob < 0) || (prob > 1) || !(boost::math::isfinite)(prob)) { @@ -36,7 +39,7 @@ inline bool check_probability(const char* function, RealType const& prob, RealTy } template -inline bool check_df(const char* function, RealType const& df, RealType* result, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline bool check_df(const char* function, RealType const& df, RealType* result, const Policy& pol) { // df > 0 but NOT +infinity allowed. if((df <= 0) || !(boost::math::isfinite)(df)) { @@ -49,7 +52,7 @@ inline bool check_df(const char* function, RealType const& df, RealType* result, } template -inline bool check_df_gt0_to_inf(const char* function, RealType const& df, RealType* result, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline bool check_df_gt0_to_inf(const char* function, RealType const& df, RealType* result, const Policy& pol) { // df > 0 or +infinity are allowed. if( (df <= 0) || (boost::math::isnan)(df) ) { // is bad df <= 0 or NaN or -infinity. @@ -63,7 +66,7 @@ inline bool check_df_gt0_to_inf(const char* function, RealType const& df, RealTy template -inline bool check_scale( +BOOST_MATH_GPU_ENABLED inline bool check_scale( const char* function, RealType scale, RealType* result, @@ -80,7 +83,7 @@ inline bool check_scale( } template -inline bool check_location( +BOOST_MATH_GPU_ENABLED inline bool check_location( const char* function, RealType location, RealType* result, @@ -97,7 +100,7 @@ inline bool check_location( } template -inline bool check_x( +BOOST_MATH_GPU_ENABLED inline bool check_x( const char* function, RealType x, RealType* result, @@ -118,7 +121,7 @@ inline bool check_x( } // bool check_x template -inline bool check_x_not_NaN( +BOOST_MATH_GPU_ENABLED inline bool check_x_not_NaN( const char* function, RealType x, RealType* result, @@ -138,7 +141,7 @@ inline bool check_x_not_NaN( } // bool check_x_not_NaN template -inline bool check_x_gt0( +BOOST_MATH_GPU_ENABLED inline bool check_x_gt0( const char* function, RealType x, RealType* result, @@ -159,7 +162,7 @@ inline bool check_x_gt0( } // bool check_x_gt0 template -inline bool check_positive_x( +BOOST_MATH_GPU_ENABLED inline bool check_positive_x( const char* function, RealType x, RealType* result, @@ -179,13 +182,14 @@ inline bool check_positive_x( } template -inline bool check_non_centrality( +BOOST_MATH_GPU_ENABLED inline bool check_non_centrality( const char* function, RealType ncp, RealType* result, const Policy& pol) { - static const RealType upper_limit = static_cast((std::numeric_limits::max)()) - boost::math::policies::get_max_root_iterations(); + BOOST_MATH_STATIC const RealType upper_limit = static_cast((boost::math::numeric_limits::max)()) - boost::math::policies::get_max_root_iterations(); + if((ncp < 0) || !(boost::math::isfinite)(ncp) || ncp > upper_limit) { *result = policies::raise_domain_error( @@ -197,7 +201,7 @@ inline bool check_non_centrality( } template -inline bool check_finite( +BOOST_MATH_GPU_ENABLED inline bool check_finite( const char* function, RealType x, RealType* result, diff --git a/include/boost/math/distributions/detail/derived_accessors.hpp b/include/boost/math/distributions/detail/derived_accessors.hpp index eb76409a1c..90679ef21f 100644 --- a/include/boost/math/distributions/detail/derived_accessors.hpp +++ b/include/boost/math/distributions/detail/derived_accessors.hpp @@ -1,4 +1,5 @@ // Copyright John Maddock 2006. +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -27,9 +28,13 @@ // can find the definitions referred to herein. // -#include +#include #include +#ifndef BOOST_MATH_HAS_NVRTC +#include +#endif + #ifdef _MSC_VER # pragma warning(push) # pragma warning(disable: 4723) // potential divide by 0 @@ -39,24 +44,24 @@ namespace boost{ namespace math{ template -typename Distribution::value_type variance(const Distribution& dist); +BOOST_MATH_GPU_ENABLED typename Distribution::value_type variance(const Distribution& dist); template -inline typename Distribution::value_type standard_deviation(const Distribution& dist) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type standard_deviation(const Distribution& dist) { BOOST_MATH_STD_USING // ADL of sqrt. return sqrt(variance(dist)); } template -inline typename Distribution::value_type variance(const Distribution& dist) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type variance(const Distribution& dist) { typename Distribution::value_type result = standard_deviation(dist); return result * result; } template -inline typename Distribution::value_type hazard(const Distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type hazard(const Distribution& dist, const RealType& x) { // hazard function // http://www.itl.nist.gov/div898/handbook/eda/section3/eda362.htm#HAZ typedef typename Distribution::value_type value_type; @@ -75,7 +80,7 @@ inline typename Distribution::value_type hazard(const Distribution& dist, const } template -inline typename Distribution::value_type chf(const Distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type chf(const Distribution& dist, const RealType& x) { // cumulative hazard function. // http://www.itl.nist.gov/div898/handbook/eda/section3/eda362.htm#HAZ BOOST_MATH_STD_USING @@ -83,7 +88,7 @@ inline typename Distribution::value_type chf(const Distribution& dist, const Rea } template -inline typename Distribution::value_type coefficient_of_variation(const Distribution& dist) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type coefficient_of_variation(const Distribution& dist) { typedef typename Distribution::value_type value_type; typedef typename Distribution::policy_type policy_type; @@ -104,33 +109,33 @@ inline typename Distribution::value_type coefficient_of_variation(const Distribu // implementation with all arguments of the same type: // template -inline typename Distribution::value_type pdf(const Distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type pdf(const Distribution& dist, const RealType& x) { typedef typename Distribution::value_type value_type; return pdf(dist, static_cast(x)); } template -inline typename Distribution::value_type logpdf(const Distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type logpdf(const Distribution& dist, const RealType& x) { using std::log; typedef typename Distribution::value_type value_type; return log(pdf(dist, static_cast(x))); } template -inline typename Distribution::value_type cdf(const Distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type cdf(const Distribution& dist, const RealType& x) { typedef typename Distribution::value_type value_type; return cdf(dist, static_cast(x)); } template -inline typename Distribution::value_type logcdf(const Distribution& dist, const Realtype& x) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type logcdf(const Distribution& dist, const Realtype& x) { using std::log; using value_type = typename Distribution::value_type; return log(cdf(dist, static_cast(x))); } template -inline typename Distribution::value_type quantile(const Distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type quantile(const Distribution& dist, const RealType& x) { typedef typename Distribution::value_type value_type; return quantile(dist, static_cast(x)); @@ -144,14 +149,14 @@ inline typename Distribution::value_type chf(const Distribution& dist, const Rea } */ template -inline typename Distribution::value_type cdf(const complemented2_type& c) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type cdf(const complemented2_type& c) { typedef typename Distribution::value_type value_type; return cdf(complement(c.dist, static_cast(c.param))); } template -inline typename Distribution::value_type logcdf(const complemented2_type& c) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type logcdf(const complemented2_type& c) { using std::log; typedef typename Distribution::value_type value_type; @@ -159,14 +164,14 @@ inline typename Distribution::value_type logcdf(const complemented2_type -inline typename Distribution::value_type quantile(const complemented2_type& c) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type quantile(const complemented2_type& c) { typedef typename Distribution::value_type value_type; return quantile(complement(c.dist, static_cast(c.param))); } template -inline typename Dist::value_type median(const Dist& d) +BOOST_MATH_GPU_ENABLED inline typename Dist::value_type median(const Dist& d) { // median - default definition for those distributions for which a // simple closed form is not known, // and for which a domain_error and/or NaN generating function is NOT defined. diff --git a/include/boost/math/distributions/detail/generic_mode.hpp b/include/boost/math/distributions/detail/generic_mode.hpp index 19c8b2af01..9306c815da 100644 --- a/include/boost/math/distributions/detail/generic_mode.hpp +++ b/include/boost/math/distributions/detail/generic_mode.hpp @@ -8,19 +8,22 @@ #ifndef BOOST_MATH_DISTRIBUTIONS_DETAIL_MODE_HPP #define BOOST_MATH_DISTRIBUTIONS_DETAIL_MODE_HPP +#include +#include #include // function minimization for mode #include #include +#include namespace boost{ namespace math{ namespace detail{ template struct pdf_minimizer { - pdf_minimizer(const Dist& d) + BOOST_MATH_GPU_ENABLED pdf_minimizer(const Dist& d) : dist(d) {} - typename Dist::value_type operator()(const typename Dist::value_type& x) + BOOST_MATH_GPU_ENABLED typename Dist::value_type operator()(const typename Dist::value_type& x) { return -pdf(dist, x); } @@ -29,7 +32,7 @@ struct pdf_minimizer }; template -typename Dist::value_type generic_find_mode(const Dist& dist, typename Dist::value_type guess, const char* function, typename Dist::value_type step = 0) +BOOST_MATH_GPU_ENABLED typename Dist::value_type generic_find_mode(const Dist& dist, typename Dist::value_type guess, const char* function, typename Dist::value_type step = 0) { BOOST_MATH_STD_USING typedef typename Dist::value_type value_type; @@ -70,7 +73,7 @@ typename Dist::value_type generic_find_mode(const Dist& dist, typename Dist::val v = pdf(dist, lower_bound); }while(maxval < v); - std::uintmax_t max_iter = policies::get_max_root_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_root_iterations(); value_type result = tools::brent_find_minima( pdf_minimizer(dist), @@ -90,7 +93,7 @@ typename Dist::value_type generic_find_mode(const Dist& dist, typename Dist::val // As above,but confined to the interval [0,1]: // template -typename Dist::value_type generic_find_mode_01(const Dist& dist, typename Dist::value_type guess, const char* function) +BOOST_MATH_GPU_ENABLED typename Dist::value_type generic_find_mode_01(const Dist& dist, typename Dist::value_type guess, const char* function) { BOOST_MATH_STD_USING typedef typename Dist::value_type value_type; @@ -121,7 +124,7 @@ typename Dist::value_type generic_find_mode_01(const Dist& dist, typename Dist:: v = pdf(dist, lower_bound); }while(maxval < v); - std::uintmax_t max_iter = policies::get_max_root_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_root_iterations(); value_type result = tools::brent_find_minima( pdf_minimizer(dist), diff --git a/include/boost/math/distributions/detail/generic_quantile.hpp b/include/boost/math/distributions/detail/generic_quantile.hpp index 438ac952f0..917532566f 100644 --- a/include/boost/math/distributions/detail/generic_quantile.hpp +++ b/include/boost/math/distributions/detail/generic_quantile.hpp @@ -6,6 +6,10 @@ #ifndef BOOST_MATH_DISTIBUTIONS_DETAIL_GENERIC_QUANTILE_HPP #define BOOST_MATH_DISTIBUTIONS_DETAIL_GENERIC_QUANTILE_HPP +#include +#include +#include + namespace boost{ namespace math{ namespace detail{ template @@ -14,10 +18,10 @@ struct generic_quantile_finder using value_type = typename Dist::value_type; using policy_type = typename Dist::policy_type; - generic_quantile_finder(const Dist& d, value_type t, bool c) + BOOST_MATH_GPU_ENABLED generic_quantile_finder(const Dist& d, value_type t, bool c) : dist(d), target(t), comp(c) {} - value_type operator()(const value_type& x) + BOOST_MATH_GPU_ENABLED value_type operator()(const value_type& x) { return comp ? value_type(target - cdf(complement(dist, x))) @@ -31,7 +35,7 @@ struct generic_quantile_finder }; template -inline T check_range_result(const T& x, const Policy& pol, const char* function) +BOOST_MATH_GPU_ENABLED inline T check_range_result(const T& x, const Policy& pol, const char* function) { if((x >= 0) && (x < tools::min_value())) { @@ -49,7 +53,7 @@ inline T check_range_result(const T& x, const Policy& pol, const char* function) } template -typename Dist::value_type generic_quantile(const Dist& dist, const typename Dist::value_type& p, const typename Dist::value_type& guess, bool comp, const char* function) +BOOST_MATH_GPU_ENABLED typename Dist::value_type generic_quantile(const Dist& dist, const typename Dist::value_type& p, const typename Dist::value_type& guess, bool comp, const char* function) { using value_type = typename Dist::value_type; using policy_type = typename Dist::policy_type; @@ -78,8 +82,8 @@ typename Dist::value_type generic_quantile(const Dist& dist, const typename Dist generic_quantile_finder f(dist, p, comp); tools::eps_tolerance tol(policies::digits() - 3); - std::uintmax_t max_iter = policies::get_max_root_iterations(); - std::pair ir = tools::bracket_and_solve_root( + boost::math::uintmax_t max_iter = policies::get_max_root_iterations(); + boost::math::pair ir = tools::bracket_and_solve_root( f, guess, value_type(2), true, tol, max_iter, forwarding_policy()); value_type result = ir.first + (ir.second - ir.first) / 2; if(max_iter >= policies::get_max_root_iterations()) diff --git a/include/boost/math/distributions/detail/inv_discrete_quantile.hpp b/include/boost/math/distributions/detail/inv_discrete_quantile.hpp index 739a866660..ac4a2b2318 100644 --- a/include/boost/math/distributions/detail/inv_discrete_quantile.hpp +++ b/include/boost/math/distributions/detail/inv_discrete_quantile.hpp @@ -6,7 +6,11 @@ #ifndef BOOST_MATH_DISTRIBUTIONS_DETAIL_INV_DISCRETE_QUANTILE #define BOOST_MATH_DISTRIBUTIONS_DETAIL_INV_DISCRETE_QUANTILE -#include +#include +#include +#include +#include +#include namespace boost{ namespace math{ namespace detail{ @@ -19,10 +23,10 @@ struct distribution_quantile_finder typedef typename Dist::value_type value_type; typedef typename Dist::policy_type policy_type; - distribution_quantile_finder(const Dist d, value_type p, bool c) + BOOST_MATH_GPU_ENABLED distribution_quantile_finder(const Dist d, value_type p, bool c) : dist(d), target(p), comp(c) {} - value_type operator()(value_type const& x) + BOOST_MATH_GPU_ENABLED value_type operator()(value_type const& x) { return comp ? value_type(target - cdf(complement(dist, x))) : value_type(cdf(dist, x) - target); } @@ -42,24 +46,24 @@ struct distribution_quantile_finder // in the root no longer being bracketed. // template -void adjust_bounds(Real& /* a */, Real& /* b */, Tol const& /* tol */){} +BOOST_MATH_GPU_ENABLED void adjust_bounds(Real& /* a */, Real& /* b */, Tol const& /* tol */){} template -void adjust_bounds(Real& /* a */, Real& b, tools::equal_floor const& /* tol */) +BOOST_MATH_GPU_ENABLED void adjust_bounds(Real& /* a */, Real& b, tools::equal_floor const& /* tol */) { BOOST_MATH_STD_USING b -= tools::epsilon() * b; } template -void adjust_bounds(Real& a, Real& /* b */, tools::equal_ceil const& /* tol */) +BOOST_MATH_GPU_ENABLED void adjust_bounds(Real& a, Real& /* b */, tools::equal_ceil const& /* tol */) { BOOST_MATH_STD_USING a += tools::epsilon() * a; } template -void adjust_bounds(Real& a, Real& b, tools::equal_nearest_integer const& /* tol */) +BOOST_MATH_GPU_ENABLED void adjust_bounds(Real& a, Real& b, tools::equal_nearest_integer const& /* tol */) { BOOST_MATH_STD_USING a += tools::epsilon() * a; @@ -69,7 +73,7 @@ void adjust_bounds(Real& a, Real& b, tools::equal_nearest_integer const& /* tol // This is where all the work is done: // template -typename Dist::value_type +BOOST_MATH_GPU_ENABLED typename Dist::value_type do_inverse_discrete_quantile( const Dist& dist, const typename Dist::value_type& p, @@ -78,12 +82,12 @@ typename Dist::value_type const typename Dist::value_type& multiplier, typename Dist::value_type adder, const Tolerance& tol, - std::uintmax_t& max_iter) + boost::math::uintmax_t& max_iter) { typedef typename Dist::value_type value_type; typedef typename Dist::policy_type policy_type; - static const char* function = "boost::math::do_inverse_discrete_quantile<%1%>"; + constexpr auto function = "boost::math::do_inverse_discrete_quantile<%1%>"; BOOST_MATH_STD_USING @@ -100,7 +104,7 @@ typename Dist::value_type guess = min_bound; value_type fa = f(guess); - std::uintmax_t count = max_iter - 1; + boost::math::uintmax_t count = max_iter - 1; value_type fb(fa), a(guess), b =0; // Compiler warning C4701: potentially uninitialized local variable 'b' used if(fa == 0) @@ -130,7 +134,7 @@ typename Dist::value_type else { b = a; - a = (std::max)(value_type(b - 1), value_type(0)); + a = BOOST_MATH_GPU_SAFE_MAX(value_type(b - 1), value_type(0)); if(a < min_bound) a = min_bound; fa = f(a); @@ -153,7 +157,7 @@ typename Dist::value_type // If we're looking for a large result, then bump "adder" up // by a bit to increase our chances of bracketing the root: // - //adder = (std::max)(adder, 0.001f * guess); + //adder = BOOST_MATH_GPU_SAFE_MAX(adder, 0.001f * guess); if(fa < 0) { b = a + adder; @@ -162,7 +166,7 @@ typename Dist::value_type } else { - b = (std::max)(value_type(a - adder), value_type(0)); + b = BOOST_MATH_GPU_SAFE_MAX(value_type(a - adder), value_type(0)); if(b < min_bound) b = min_bound; } @@ -186,7 +190,7 @@ typename Dist::value_type } else { - b = (std::max)(value_type(a - adder), value_type(0)); + b = BOOST_MATH_GPU_SAFE_MAX(value_type(a - adder), value_type(0)); if(b < min_bound) b = min_bound; } @@ -195,9 +199,8 @@ typename Dist::value_type } if(a > b) { - using std::swap; - swap(a, b); - swap(fa, fb); + BOOST_MATH_GPU_SAFE_SWAP(a, b); + BOOST_MATH_GPU_SAFE_SWAP(fa, fb); } } // @@ -274,7 +277,7 @@ typename Dist::value_type // // Go ahead and find the root: // - std::pair r = toms748_solve(f, a, b, fa, fb, tol, count, policy_type()); + boost::math::pair r = toms748_solve(f, a, b, fa, fb, tol, count, policy_type()); max_iter += count; if (max_iter >= policies::get_max_root_iterations()) { @@ -293,7 +296,7 @@ typename Dist::value_type // is very close 1. // template -inline typename Dist::value_type round_to_floor(const Dist& d, typename Dist::value_type result, typename Dist::value_type p, bool c) +BOOST_MATH_GPU_ENABLED inline typename Dist::value_type round_to_floor(const Dist& d, typename Dist::value_type result, typename Dist::value_type p, bool c) { BOOST_MATH_STD_USING typename Dist::value_type cc = ceil(result); @@ -307,7 +310,11 @@ inline typename Dist::value_type round_to_floor(const Dist& d, typename Dist::va // while(result != 0) { + #ifdef BOOST_MATH_HAS_GPU_SUPPORT + cc = floor(::nextafter(result, -tools::max_value())); + #else cc = floor(float_prior(result)); + #endif if(cc < support(d).first) break; pp = c ? cdf(complement(d, cc)) : cdf(d, cc); @@ -325,7 +332,7 @@ inline typename Dist::value_type round_to_floor(const Dist& d, typename Dist::va #endif template -inline typename Dist::value_type round_to_ceil(const Dist& d, typename Dist::value_type result, typename Dist::value_type p, bool c) +BOOST_MATH_GPU_ENABLED inline typename Dist::value_type round_to_ceil(const Dist& d, typename Dist::value_type result, typename Dist::value_type p, bool c) { BOOST_MATH_STD_USING typename Dist::value_type cc = floor(result); @@ -339,7 +346,11 @@ inline typename Dist::value_type round_to_ceil(const Dist& d, typename Dist::val // while(true) { + #ifdef BOOST_MATH_HAS_GPU_SUPPORT + cc = ceil(::nextafter(result, tools::max_value())); + #else cc = ceil(float_next(result)); + #endif if(cc > support(d).second) break; pp = c ? cdf(complement(d, cc)) : cdf(d, cc); @@ -362,7 +373,7 @@ inline typename Dist::value_type round_to_ceil(const Dist& d, typename Dist::val // to an int where required. // template -inline typename Dist::value_type +BOOST_MATH_GPU_ENABLED inline typename Dist::value_type inverse_discrete_quantile( const Dist& dist, typename Dist::value_type p, @@ -371,7 +382,7 @@ inline typename Dist::value_type const typename Dist::value_type& multiplier, const typename Dist::value_type& adder, const policies::discrete_quantile&, - std::uintmax_t& max_iter) + boost::math::uintmax_t& max_iter) { if(p > 0.5) { @@ -393,7 +404,7 @@ inline typename Dist::value_type } template -inline typename Dist::value_type +BOOST_MATH_GPU_ENABLED inline typename Dist::value_type inverse_discrete_quantile( const Dist& dist, const typename Dist::value_type& p, @@ -402,7 +413,7 @@ inline typename Dist::value_type const typename Dist::value_type& multiplier, const typename Dist::value_type& adder, const policies::discrete_quantile&, - std::uintmax_t& max_iter) + boost::math::uintmax_t& max_iter) { typedef typename Dist::value_type value_type; BOOST_MATH_STD_USING @@ -436,7 +447,7 @@ inline typename Dist::value_type } template -inline typename Dist::value_type +BOOST_MATH_GPU_ENABLED inline typename Dist::value_type inverse_discrete_quantile( const Dist& dist, const typename Dist::value_type& p, @@ -445,7 +456,7 @@ inline typename Dist::value_type const typename Dist::value_type& multiplier, const typename Dist::value_type& adder, const policies::discrete_quantile&, - std::uintmax_t& max_iter) + boost::math::uintmax_t& max_iter) { typedef typename Dist::value_type value_type; BOOST_MATH_STD_USING @@ -479,7 +490,7 @@ inline typename Dist::value_type } template -inline typename Dist::value_type +BOOST_MATH_GPU_ENABLED inline typename Dist::value_type inverse_discrete_quantile( const Dist& dist, const typename Dist::value_type& p, @@ -488,7 +499,7 @@ inline typename Dist::value_type const typename Dist::value_type& multiplier, const typename Dist::value_type& adder, const policies::discrete_quantile&, - std::uintmax_t& max_iter) + boost::math::uintmax_t& max_iter) { typedef typename Dist::value_type value_type; BOOST_MATH_STD_USING @@ -507,7 +518,7 @@ inline typename Dist::value_type } template -inline typename Dist::value_type +BOOST_MATH_GPU_ENABLED inline typename Dist::value_type inverse_discrete_quantile( const Dist& dist, const typename Dist::value_type& p, @@ -516,7 +527,7 @@ inline typename Dist::value_type const typename Dist::value_type& multiplier, const typename Dist::value_type& adder, const policies::discrete_quantile&, - std::uintmax_t& max_iter) + boost::math::uintmax_t& max_iter) { BOOST_MATH_STD_USING typename Dist::value_type pp = c ? 1 - p : p; @@ -534,7 +545,7 @@ inline typename Dist::value_type } template -inline typename Dist::value_type +BOOST_MATH_GPU_ENABLED inline typename Dist::value_type inverse_discrete_quantile( const Dist& dist, const typename Dist::value_type& p, @@ -543,7 +554,7 @@ inline typename Dist::value_type const typename Dist::value_type& multiplier, const typename Dist::value_type& adder, const policies::discrete_quantile&, - std::uintmax_t& max_iter) + boost::math::uintmax_t& max_iter) { typedef typename Dist::value_type value_type; BOOST_MATH_STD_USING diff --git a/include/boost/math/distributions/exponential.hpp b/include/boost/math/distributions/exponential.hpp index 164e01f205..9d45ac4933 100644 --- a/include/boost/math/distributions/exponential.hpp +++ b/include/boost/math/distributions/exponential.hpp @@ -1,4 +1,5 @@ // Copyright John Maddock 2006. +// Copyright Matt Borland 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -6,12 +7,16 @@ #ifndef BOOST_STATS_EXPONENTIAL_HPP #define BOOST_STATS_EXPONENTIAL_HPP -#include +#include +#include +#include #include #include #include #include #include +#include +#include #ifdef _MSC_VER # pragma warning(push) @@ -19,8 +24,11 @@ # pragma warning(disable: 4702) // unreachable code (return after domain_error throw). #endif +#ifndef BOOST_MATH_HAS_NVRTC +#include #include #include +#endif namespace boost{ namespace math{ @@ -29,7 +37,7 @@ namespace detail{ // Error check: // template -inline bool verify_lambda(const char* function, RealType l, RealType* presult, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline bool verify_lambda(const char* function, RealType l, RealType* presult, const Policy& pol) { if((l <= 0) || !(boost::math::isfinite)(l)) { @@ -42,7 +50,7 @@ inline bool verify_lambda(const char* function, RealType l, RealType* presult, c } template -inline bool verify_exp_x(const char* function, RealType x, RealType* presult, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline bool verify_exp_x(const char* function, RealType x, RealType* presult, const Policy& pol) { if((x < 0) || (boost::math::isnan)(x)) { @@ -63,14 +71,14 @@ class exponential_distribution using value_type = RealType; using policy_type = Policy; - explicit exponential_distribution(RealType l_lambda = 1) + BOOST_MATH_GPU_ENABLED explicit exponential_distribution(RealType l_lambda = 1) : m_lambda(l_lambda) { RealType err; detail::verify_lambda("boost::math::exponential_distribution<%1%>::exponential_distribution", l_lambda, &err, Policy()); } // exponential_distribution - RealType lambda()const { return m_lambda; } + BOOST_MATH_GPU_ENABLED RealType lambda()const { return m_lambda; } private: RealType m_lambda; @@ -84,35 +92,35 @@ exponential_distribution(RealType)->exponential_distribution -inline std::pair range(const exponential_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline boost::math::pair range(const exponential_distribution& /*dist*/) { // Range of permissible values for random variable x. - if (std::numeric_limits::has_infinity) + BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits::has_infinity) { - return std::pair(static_cast(0), std::numeric_limits::infinity()); // 0 to + infinity. + return boost::math::pair(static_cast(0), boost::math::numeric_limits::infinity()); // 0 to + infinity. } else { using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); // 0 to + max + return boost::math::pair(static_cast(0), max_value()); // 0 to + max } } template -inline std::pair support(const exponential_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline boost::math::pair support(const exponential_distribution& /*dist*/) { // Range of supported values for random variable x. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. using boost::math::tools::max_value; using boost::math::tools::min_value; - return std::pair(min_value(), max_value()); + return boost::math::pair(min_value(), max_value()); // min_value() to avoid a discontinuity at x = 0. } template -inline RealType pdf(const exponential_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType pdf(const exponential_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::pdf(const exponential_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::pdf(const exponential_distribution<%1%>&, %1%)"; RealType lambda = dist.lambda(); RealType result = 0; @@ -128,14 +136,14 @@ inline RealType pdf(const exponential_distribution& dist, cons } // pdf template -inline RealType logpdf(const exponential_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType logpdf(const exponential_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::logpdf(const exponential_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::logpdf(const exponential_distribution<%1%>&, %1%)"; RealType lambda = dist.lambda(); - RealType result = -std::numeric_limits::infinity(); + RealType result = -boost::math::numeric_limits::infinity(); if(0 == detail::verify_lambda(function, lambda, &result, Policy())) return result; if(0 == detail::verify_exp_x(function, x, &result, Policy())) @@ -146,11 +154,11 @@ inline RealType logpdf(const exponential_distribution& dist, c } // logpdf template -inline RealType cdf(const exponential_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const exponential_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(const exponential_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const exponential_distribution<%1%>&, %1%)"; RealType result = 0; RealType lambda = dist.lambda(); @@ -164,11 +172,11 @@ inline RealType cdf(const exponential_distribution& dist, cons } // cdf template -inline RealType logcdf(const exponential_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType logcdf(const exponential_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::logcdf(const exponential_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::logcdf(const exponential_distribution<%1%>&, %1%)"; RealType result = 0; RealType lambda = dist.lambda(); @@ -182,11 +190,11 @@ inline RealType logcdf(const exponential_distribution& dist, c } // cdf template -inline RealType quantile(const exponential_distribution& dist, const RealType& p) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const exponential_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const exponential_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const exponential_distribution<%1%>&, %1%)"; RealType result = 0; RealType lambda = dist.lambda(); @@ -205,11 +213,11 @@ inline RealType quantile(const exponential_distribution& dist, } // quantile template -inline RealType cdf(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(const exponential_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const exponential_distribution<%1%>&, %1%)"; RealType result = 0; RealType lambda = c.dist.lambda(); @@ -226,11 +234,11 @@ inline RealType cdf(const complemented2_type -inline RealType logcdf(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType logcdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::logcdf(const exponential_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::logcdf(const exponential_distribution<%1%>&, %1%)"; RealType result = 0; RealType lambda = c.dist.lambda(); @@ -247,11 +255,11 @@ inline RealType logcdf(const complemented2_type -inline RealType quantile(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const exponential_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const exponential_distribution<%1%>&, %1%)"; RealType result = 0; RealType lambda = c.dist.lambda(); @@ -272,7 +280,7 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const exponential_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mean(const exponential_distribution& dist) { RealType result = 0; RealType lambda = dist.lambda(); @@ -282,7 +290,7 @@ inline RealType mean(const exponential_distribution& dist) } template -inline RealType standard_deviation(const exponential_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType standard_deviation(const exponential_distribution& dist) { RealType result = 0; RealType lambda = dist.lambda(); @@ -292,38 +300,38 @@ inline RealType standard_deviation(const exponential_distribution -inline RealType mode(const exponential_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType mode(const exponential_distribution& /*dist*/) { return 0; } template -inline RealType median(const exponential_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType median(const exponential_distribution& dist) { using boost::math::constants::ln_two; return ln_two() / dist.lambda(); // ln(2) / lambda } template -inline RealType skewness(const exponential_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType skewness(const exponential_distribution& /*dist*/) { return 2; } template -inline RealType kurtosis(const exponential_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const exponential_distribution& /*dist*/) { return 9; } template -inline RealType kurtosis_excess(const exponential_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const exponential_distribution& /*dist*/) { return 6; } template -inline RealType entropy(const exponential_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType entropy(const exponential_distribution& dist) { using std::log; return 1 - log(dist.lambda()); diff --git a/include/boost/math/distributions/extreme_value.hpp b/include/boost/math/distributions/extreme_value.hpp index 1bde2743c0..73454d29d4 100644 --- a/include/boost/math/distributions/extreme_value.hpp +++ b/include/boost/math/distributions/extreme_value.hpp @@ -1,4 +1,5 @@ // Copyright John Maddock 2006. +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -6,12 +7,17 @@ #ifndef BOOST_STATS_EXTREME_VALUE_HPP #define BOOST_STATS_EXTREME_VALUE_HPP -#include +#include +#include +#include +#include #include #include #include #include #include +#include +#include // // This is the maximum extreme value distribution, see @@ -20,8 +26,11 @@ // Also known as a Fisher-Tippett distribution, a log-Weibull // distribution or a Gumbel distribution. +#ifndef BOOST_MATH_HAS_NVRTC +#include #include #include +#endif #ifdef _MSC_VER # pragma warning(push) @@ -35,7 +44,7 @@ namespace detail{ // Error check: // template -inline bool verify_scale_b(const char* function, RealType b, RealType* presult, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline bool verify_scale_b(const char* function, RealType b, RealType* presult, const Policy& pol) { if((b <= 0) || !(boost::math::isfinite)(b)) { @@ -56,7 +65,7 @@ class extreme_value_distribution using value_type = RealType; using policy_type = Policy; - explicit extreme_value_distribution(RealType a = 0, RealType b = 1) + BOOST_MATH_GPU_ENABLED explicit extreme_value_distribution(RealType a = 0, RealType b = 1) : m_a(a), m_b(b) { RealType err; @@ -64,8 +73,8 @@ class extreme_value_distribution detail::check_finite("boost::math::extreme_value_distribution<%1%>::extreme_value_distribution", a, &err, Policy()); } // extreme_value_distribution - RealType location()const { return m_a; } - RealType scale()const { return m_b; } + BOOST_MATH_GPU_ENABLED RealType location()const { return m_a; } + BOOST_MATH_GPU_ENABLED RealType scale()const { return m_b; } private: RealType m_a; @@ -82,28 +91,28 @@ extreme_value_distribution(RealType,RealType)->extreme_value_distribution -inline std::pair range(const extreme_value_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline boost::math::pair range(const extreme_value_distribution& /*dist*/) { // Range of permissible values for random variable x. using boost::math::tools::max_value; - return std::pair( - std::numeric_limits::has_infinity ? -std::numeric_limits::infinity() : -max_value(), - std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : max_value()); + return boost::math::pair( + boost::math::numeric_limits::has_infinity ? -boost::math::numeric_limits::infinity() : -max_value(), + boost::math::numeric_limits::has_infinity ? boost::math::numeric_limits::infinity() : max_value()); } template -inline std::pair support(const extreme_value_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline boost::math::pair support(const extreme_value_distribution& /*dist*/) { // Range of supported values for random variable x. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. using boost::math::tools::max_value; - return std::pair(-max_value(), max_value()); + return boost::math::pair(-max_value(), max_value()); } template -inline RealType pdf(const extreme_value_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType pdf(const extreme_value_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::pdf(const extreme_value_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::pdf(const extreme_value_distribution<%1%>&, %1%)"; RealType a = dist.location(); RealType b = dist.scale(); @@ -124,15 +133,15 @@ inline RealType pdf(const extreme_value_distribution& dist, co } // pdf template -inline RealType logpdf(const extreme_value_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType logpdf(const extreme_value_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::logpdf(const extreme_value_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::logpdf(const extreme_value_distribution<%1%>&, %1%)"; RealType a = dist.location(); RealType b = dist.scale(); - RealType result = -std::numeric_limits::infinity(); + RealType result = -boost::math::numeric_limits::infinity(); if(0 == detail::verify_scale_b(function, b, &result, Policy())) return result; if(0 == detail::check_finite(function, a, &result, Policy())) @@ -149,11 +158,11 @@ inline RealType logpdf(const extreme_value_distribution& dist, } // logpdf template -inline RealType cdf(const extreme_value_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const extreme_value_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(const extreme_value_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const extreme_value_distribution<%1%>&, %1%)"; if((boost::math::isinf)(x)) return x < 0 ? 0.0f : 1.0f; @@ -175,11 +184,11 @@ inline RealType cdf(const extreme_value_distribution& dist, co } // cdf template -inline RealType logcdf(const extreme_value_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType logcdf(const extreme_value_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::logcdf(const extreme_value_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::logcdf(const extreme_value_distribution<%1%>&, %1%)"; if((boost::math::isinf)(x)) return x < 0 ? 0.0f : 1.0f; @@ -201,11 +210,11 @@ inline RealType logcdf(const extreme_value_distribution& dist, } // logcdf template -RealType quantile(const extreme_value_distribution& dist, const RealType& p) +BOOST_MATH_GPU_ENABLED RealType quantile(const extreme_value_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const extreme_value_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const extreme_value_distribution<%1%>&, %1%)"; RealType a = dist.location(); RealType b = dist.scale(); @@ -228,11 +237,11 @@ RealType quantile(const extreme_value_distribution& dist, cons } // quantile template -inline RealType cdf(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(const extreme_value_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const extreme_value_distribution<%1%>&, %1%)"; if((boost::math::isinf)(c.param)) return c.param < 0 ? 1.0f : 0.0f; @@ -252,11 +261,11 @@ inline RealType cdf(const complemented2_type -inline RealType logcdf(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType logcdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::logcdf(const extreme_value_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::logcdf(const extreme_value_distribution<%1%>&, %1%)"; if((boost::math::isinf)(c.param)) return c.param < 0 ? 1.0f : 0.0f; @@ -276,11 +285,11 @@ inline RealType logcdf(const complemented2_type -RealType quantile(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const extreme_value_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const extreme_value_distribution<%1%>&, %1%)"; RealType a = c.dist.location(); RealType b = c.dist.scale(); @@ -304,7 +313,7 @@ RealType quantile(const complemented2_type -inline RealType mean(const extreme_value_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mean(const extreme_value_distribution& dist) { RealType a = dist.location(); RealType b = dist.scale(); @@ -317,7 +326,7 @@ inline RealType mean(const extreme_value_distribution& dist) } template -inline RealType standard_deviation(const extreme_value_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType standard_deviation(const extreme_value_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions. @@ -331,20 +340,20 @@ inline RealType standard_deviation(const extreme_value_distribution -inline RealType mode(const extreme_value_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mode(const extreme_value_distribution& dist) { return dist.location(); } template -inline RealType median(const extreme_value_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType median(const extreme_value_distribution& dist) { using constants::ln_ln_two; return dist.location() - dist.scale() * ln_ln_two(); } template -inline RealType skewness(const extreme_value_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType skewness(const extreme_value_distribution& /*dist*/) { // // This is 12 * sqrt(6) * zeta(3) / pi^3: @@ -354,14 +363,14 @@ inline RealType skewness(const extreme_value_distribution& /*d } template -inline RealType kurtosis(const extreme_value_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const extreme_value_distribution& /*dist*/) { // See http://mathworld.wolfram.com/ExtremeValueDistribution.html return RealType(27) / 5; } template -inline RealType kurtosis_excess(const extreme_value_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const extreme_value_distribution& /*dist*/) { // See http://mathworld.wolfram.com/ExtremeValueDistribution.html return RealType(12) / 5; diff --git a/include/boost/math/distributions/fisher_f.hpp b/include/boost/math/distributions/fisher_f.hpp index e22cdf50ae..56b288d88e 100644 --- a/include/boost/math/distributions/fisher_f.hpp +++ b/include/boost/math/distributions/fisher_f.hpp @@ -1,5 +1,5 @@ // Copyright John Maddock 2006. - +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt @@ -8,14 +8,15 @@ #ifndef BOOST_MATH_DISTRIBUTIONS_FISHER_F_HPP #define BOOST_MATH_DISTRIBUTIONS_FISHER_F_HPP +#include +#include +#include #include #include // for incomplete beta. #include // complements #include // error checks #include -#include - namespace boost{ namespace math{ template > @@ -25,9 +26,9 @@ class fisher_f_distribution typedef RealType value_type; typedef Policy policy_type; - fisher_f_distribution(const RealType& i, const RealType& j) : m_df1(i), m_df2(j) + BOOST_MATH_GPU_ENABLED fisher_f_distribution(const RealType& i, const RealType& j) : m_df1(i), m_df2(j) { - static const char* function = "fisher_f_distribution<%1%>::fisher_f_distribution"; + constexpr auto function = "fisher_f_distribution<%1%>::fisher_f_distribution"; RealType result; detail::check_df( function, m_df1, &result, Policy()); @@ -35,11 +36,11 @@ class fisher_f_distribution function, m_df2, &result, Policy()); } // fisher_f_distribution - RealType degrees_of_freedom1()const + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom1()const { return m_df1; } - RealType degrees_of_freedom2()const + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom2()const { return m_df2; } @@ -60,29 +61,29 @@ fisher_f_distribution(RealType,RealType)->fisher_f_distribution -inline const std::pair range(const fisher_f_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline const boost::math::pair range(const fisher_f_distribution& /*dist*/) { // Range of permissible values for random variable x. using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); + return boost::math::pair(static_cast(0), max_value()); } template -inline const std::pair support(const fisher_f_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline const boost::math::pair support(const fisher_f_distribution& /*dist*/) { // Range of supported values for random variable x. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); + return boost::math::pair(static_cast(0), max_value()); } template -RealType pdf(const fisher_f_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED RealType pdf(const fisher_f_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions RealType df1 = dist.degrees_of_freedom1(); RealType df2 = dist.degrees_of_freedom2(); // Error check: RealType error_result = 0; - static const char* function = "boost::math::pdf(fisher_f_distribution<%1%> const&, %1%)"; + constexpr auto function = "boost::math::pdf(fisher_f_distribution<%1%> const&, %1%)"; if(false == (detail::check_df( function, df1, &error_result, Policy()) && detail::check_df( @@ -132,9 +133,9 @@ RealType pdf(const fisher_f_distribution& dist, const RealType } // pdf template -inline RealType cdf(const fisher_f_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const fisher_f_distribution& dist, const RealType& x) { - static const char* function = "boost::math::cdf(fisher_f_distribution<%1%> const&, %1%)"; + constexpr auto function = "boost::math::cdf(fisher_f_distribution<%1%> const&, %1%)"; RealType df1 = dist.degrees_of_freedom1(); RealType df2 = dist.degrees_of_freedom2(); // Error check: @@ -167,9 +168,9 @@ inline RealType cdf(const fisher_f_distribution& dist, const R } // cdf template -inline RealType quantile(const fisher_f_distribution& dist, const RealType& p) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const fisher_f_distribution& dist, const RealType& p) { - static const char* function = "boost::math::quantile(fisher_f_distribution<%1%> const&, %1%)"; + constexpr auto function = "boost::math::quantile(fisher_f_distribution<%1%> const&, %1%)"; RealType df1 = dist.degrees_of_freedom1(); RealType df2 = dist.degrees_of_freedom2(); // Error check: @@ -192,9 +193,9 @@ inline RealType quantile(const fisher_f_distribution& dist, co } // quantile template -inline RealType cdf(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { - static const char* function = "boost::math::cdf(fisher_f_distribution<%1%> const&, %1%)"; + constexpr auto function = "boost::math::cdf(fisher_f_distribution<%1%> const&, %1%)"; RealType df1 = c.dist.degrees_of_freedom1(); RealType df2 = c.dist.degrees_of_freedom2(); RealType x = c.param; @@ -228,9 +229,9 @@ inline RealType cdf(const complemented2_type -inline RealType quantile(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { - static const char* function = "boost::math::quantile(fisher_f_distribution<%1%> const&, %1%)"; + constexpr auto function = "boost::math::quantile(fisher_f_distribution<%1%> const&, %1%)"; RealType df1 = c.dist.degrees_of_freedom1(); RealType df2 = c.dist.degrees_of_freedom2(); RealType p = c.param; @@ -252,9 +253,9 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const fisher_f_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mean(const fisher_f_distribution& dist) { // Mean of F distribution = v. - static const char* function = "boost::math::mean(fisher_f_distribution<%1%> const&)"; + constexpr auto function = "boost::math::mean(fisher_f_distribution<%1%> const&)"; RealType df1 = dist.degrees_of_freedom1(); RealType df2 = dist.degrees_of_freedom2(); // Error check: @@ -273,9 +274,9 @@ inline RealType mean(const fisher_f_distribution& dist) } // mean template -inline RealType variance(const fisher_f_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType variance(const fisher_f_distribution& dist) { // Variance of F distribution. - static const char* function = "boost::math::variance(fisher_f_distribution<%1%> const&)"; + constexpr auto function = "boost::math::variance(fisher_f_distribution<%1%> const&)"; RealType df1 = dist.degrees_of_freedom1(); RealType df2 = dist.degrees_of_freedom2(); // Error check: @@ -294,9 +295,9 @@ inline RealType variance(const fisher_f_distribution& dist) } // variance template -inline RealType mode(const fisher_f_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mode(const fisher_f_distribution& dist) { - static const char* function = "boost::math::mode(fisher_f_distribution<%1%> const&)"; + constexpr auto function = "boost::math::mode(fisher_f_distribution<%1%> const&)"; RealType df1 = dist.degrees_of_freedom1(); RealType df2 = dist.degrees_of_freedom2(); // Error check: @@ -317,15 +318,15 @@ inline RealType mode(const fisher_f_distribution& dist) //template //inline RealType median(const fisher_f_distribution& dist) //{ // Median of Fisher F distribution is not defined. -// return tools::domain_error(BOOST_CURRENT_FUNCTION, "Median is not implemented, result is %1%!", std::numeric_limits::quiet_NaN()); +// return tools::domain_error(BOOST_CURRENT_FUNCTION, "Median is not implemented, result is %1%!", boost::math::numeric_limits::quiet_NaN()); // } // median // Now implemented via quantile(half) in derived accessors. template -inline RealType skewness(const fisher_f_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType skewness(const fisher_f_distribution& dist) { - static const char* function = "boost::math::skewness(fisher_f_distribution<%1%> const&)"; + constexpr auto function = "boost::math::skewness(fisher_f_distribution<%1%> const&)"; BOOST_MATH_STD_USING // ADL of std names // See http://mathworld.wolfram.com/F-Distribution.html RealType df1 = dist.degrees_of_freedom1(); @@ -346,18 +347,18 @@ inline RealType skewness(const fisher_f_distribution& dist) } template -RealType kurtosis_excess(const fisher_f_distribution& dist); +BOOST_MATH_GPU_ENABLED RealType kurtosis_excess(const fisher_f_distribution& dist); template -inline RealType kurtosis(const fisher_f_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const fisher_f_distribution& dist) { return 3 + kurtosis_excess(dist); } template -inline RealType kurtosis_excess(const fisher_f_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const fisher_f_distribution& dist) { - static const char* function = "boost::math::kurtosis_excess(fisher_f_distribution<%1%> const&)"; + constexpr auto function = "boost::math::kurtosis_excess(fisher_f_distribution<%1%> const&)"; // See http://mathworld.wolfram.com/F-Distribution.html RealType df1 = dist.degrees_of_freedom1(); RealType df2 = dist.degrees_of_freedom2(); diff --git a/include/boost/math/distributions/fwd.hpp b/include/boost/math/distributions/fwd.hpp index a3c1a41df5..ccb3c0cd1b 100644 --- a/include/boost/math/distributions/fwd.hpp +++ b/include/boost/math/distributions/fwd.hpp @@ -66,6 +66,18 @@ class inverse_gaussian_distribution; template class kolmogorov_smirnov_distribution; +template +class landau_distribution; + +template +class mapairy_distribution; + +template +class holtsmark_distribution; + +template +class saspoint5_distribution; + template class laplace_distribution; @@ -136,6 +148,10 @@ class weibull_distribution; typedef boost::math::inverse_chi_squared_distribution inverse_chi_squared;\ typedef boost::math::inverse_gaussian_distribution inverse_gaussian;\ typedef boost::math::inverse_gamma_distribution inverse_gamma;\ + typedef boost::math::landau_distribution landau;\ + typedef boost::math::mapairy_distribution mapairy;\ + typedef boost::math::holtsmark_distribution holtsmark;\ + typedef boost::math::saspoint5_distribution saspoint5;\ typedef boost::math::laplace_distribution laplace;\ typedef boost::math::logistic_distribution logistic;\ typedef boost::math::lognormal_distribution lognormal;\ diff --git a/include/boost/math/distributions/gamma.hpp b/include/boost/math/distributions/gamma.hpp index 28b7c55b0b..5176f906d8 100644 --- a/include/boost/math/distributions/gamma.hpp +++ b/include/boost/math/distributions/gamma.hpp @@ -1,4 +1,5 @@ // Copyright John Maddock 2006. +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -10,22 +11,22 @@ // http://mathworld.wolfram.com/GammaDistribution.html // http://en.wikipedia.org/wiki/Gamma_distribution +#include +#include +#include #include #include #include #include #include -#include -#include - namespace boost{ namespace math { namespace detail { template -inline bool check_gamma_shape( +BOOST_MATH_GPU_ENABLED inline bool check_gamma_shape( const char* function, RealType shape, RealType* result, const Policy& pol) @@ -41,7 +42,7 @@ inline bool check_gamma_shape( } template -inline bool check_gamma_x( +BOOST_MATH_GPU_ENABLED inline bool check_gamma_x( const char* function, RealType const& x, RealType* result, const Policy& pol) @@ -57,7 +58,7 @@ inline bool check_gamma_x( } template -inline bool check_gamma( +BOOST_MATH_GPU_ENABLED inline bool check_gamma( const char* function, RealType scale, RealType shape, @@ -75,19 +76,19 @@ class gamma_distribution using value_type = RealType; using policy_type = Policy; - explicit gamma_distribution(RealType l_shape, RealType l_scale = 1) + BOOST_MATH_GPU_ENABLED explicit gamma_distribution(RealType l_shape, RealType l_scale = 1) : m_shape(l_shape), m_scale(l_scale) { RealType result; detail::check_gamma("boost::math::gamma_distribution<%1%>::gamma_distribution", l_scale, l_shape, &result, Policy()); } - RealType shape()const + BOOST_MATH_GPU_ENABLED RealType shape()const { return m_shape; } - RealType scale()const + BOOST_MATH_GPU_ENABLED RealType scale()const { return m_scale; } @@ -109,27 +110,27 @@ gamma_distribution(RealType,RealType)->gamma_distribution -inline std::pair range(const gamma_distribution& /* dist */) +BOOST_MATH_GPU_ENABLED inline boost::math::pair range(const gamma_distribution& /* dist */) { // Range of permissible values for random variable x. using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); + return boost::math::pair(static_cast(0), max_value()); } template -inline std::pair support(const gamma_distribution& /* dist */) +BOOST_MATH_GPU_ENABLED inline boost::math::pair support(const gamma_distribution& /* dist */) { // Range of supported values for random variable x. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. using boost::math::tools::max_value; using boost::math::tools::min_value; - return std::pair(min_value(), max_value()); + return boost::math::pair(min_value(), max_value()); } template -inline RealType pdf(const gamma_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType pdf(const gamma_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::pdf(const gamma_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::pdf(const gamma_distribution<%1%>&, %1%)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -149,17 +150,17 @@ inline RealType pdf(const gamma_distribution& dist, const Real } // pdf template -inline RealType logpdf(const gamma_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType logpdf(const gamma_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions using boost::math::lgamma; - static const char* function = "boost::math::logpdf(const gamma_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::logpdf(const gamma_distribution<%1%>&, %1%)"; RealType k = dist.shape(); RealType theta = dist.scale(); - RealType result = -std::numeric_limits::infinity(); + RealType result = -boost::math::numeric_limits::infinity(); if(false == detail::check_gamma(function, theta, k, &result, Policy())) return result; if(false == detail::check_gamma_x(function, x, &result, Policy())) @@ -167,7 +168,7 @@ inline RealType logpdf(const gamma_distribution& dist, const R if(x == 0) { - return std::numeric_limits::quiet_NaN(); + return boost::math::numeric_limits::quiet_NaN(); } result = -k*log(theta) + (k-1)*log(x) - lgamma(k) - (x/theta); @@ -176,11 +177,11 @@ inline RealType logpdf(const gamma_distribution& dist, const R } // logpdf template -inline RealType cdf(const gamma_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const gamma_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(const gamma_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const gamma_distribution<%1%>&, %1%)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -196,11 +197,11 @@ inline RealType cdf(const gamma_distribution& dist, const Real } // cdf template -inline RealType quantile(const gamma_distribution& dist, const RealType& p) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const gamma_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -220,11 +221,11 @@ inline RealType quantile(const gamma_distribution& dist, const } template -inline RealType cdf(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)"; RealType shape = c.dist.shape(); RealType scale = c.dist.scale(); @@ -241,11 +242,11 @@ inline RealType cdf(const complemented2_type -inline RealType quantile(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)"; RealType shape = c.dist.shape(); RealType scale = c.dist.scale(); @@ -266,11 +267,11 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const gamma_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mean(const gamma_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::mean(const gamma_distribution<%1%>&)"; + constexpr auto function = "boost::math::mean(const gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -284,11 +285,11 @@ inline RealType mean(const gamma_distribution& dist) } template -inline RealType variance(const gamma_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType variance(const gamma_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::variance(const gamma_distribution<%1%>&)"; + constexpr auto function = "boost::math::variance(const gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -302,11 +303,11 @@ inline RealType variance(const gamma_distribution& dist) } template -inline RealType mode(const gamma_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mode(const gamma_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::mode(const gamma_distribution<%1%>&)"; + constexpr auto function = "boost::math::mode(const gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -331,11 +332,11 @@ inline RealType mode(const gamma_distribution& dist) //} template -inline RealType skewness(const gamma_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType skewness(const gamma_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::skewness(const gamma_distribution<%1%>&)"; + constexpr auto function = "boost::math::skewness(const gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -349,11 +350,11 @@ inline RealType skewness(const gamma_distribution& dist) } template -inline RealType kurtosis_excess(const gamma_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const gamma_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::kurtosis_excess(const gamma_distribution<%1%>&)"; + constexpr auto function = "boost::math::kurtosis_excess(const gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -367,18 +368,19 @@ inline RealType kurtosis_excess(const gamma_distribution& dist } template -inline RealType kurtosis(const gamma_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const gamma_distribution& dist) { return kurtosis_excess(dist) + 3; } template -inline RealType entropy(const gamma_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType entropy(const gamma_distribution& dist) { + BOOST_MATH_STD_USING + RealType k = dist.shape(); RealType theta = dist.scale(); - using std::log; - return k + log(theta) + lgamma(k) + (1-k)*digamma(k); + return k + log(theta) + boost::math::lgamma(k) + (1-k)*digamma(k); } } // namespace math diff --git a/include/boost/math/distributions/geometric.hpp b/include/boost/math/distributions/geometric.hpp index 7c511ef2db..0a7b383c24 100644 --- a/include/boost/math/distributions/geometric.hpp +++ b/include/boost/math/distributions/geometric.hpp @@ -36,6 +36,9 @@ #ifndef BOOST_MATH_SPECIAL_GEOMETRIC_HPP #define BOOST_MATH_SPECIAL_GEOMETRIC_HPP +#include +#include +#include #include #include // for ibeta(a, b, x) == Ix(a, b). #include // complement. @@ -45,10 +48,6 @@ #include #include -#include // using std::numeric_limits; -#include -#include - #if defined (BOOST_MSVC) # pragma warning(push) // This believed not now necessary, so commented out. @@ -64,7 +63,7 @@ namespace boost { // Common error checking routines for geometric distribution function: template - inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& pol) { if( !(boost::math::isfinite)(p) || (p < 0) || (p > 1) ) { @@ -77,13 +76,13 @@ namespace boost } template - inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& pol) { return check_success_fraction(function, p, result, pol); } template - inline bool check_dist_and_k(const char* function, const RealType& p, RealType k, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_dist_and_k(const char* function, const RealType& p, RealType k, RealType* result, const Policy& pol) { if(check_dist(function, p, result, pol) == false) { @@ -100,7 +99,7 @@ namespace boost } // Check_dist_and_k template - inline bool check_dist_and_prob(const char* function, RealType p, RealType prob, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_dist_and_prob(const char* function, RealType p, RealType prob, RealType* result, const Policy& pol) { if((check_dist(function, p, result, pol) && detail::check_probability(function, prob, result, pol)) == false) { @@ -117,7 +116,7 @@ namespace boost typedef RealType value_type; typedef Policy policy_type; - geometric_distribution(RealType p) : m_p(p) + BOOST_MATH_GPU_ENABLED geometric_distribution(RealType p) : m_p(p) { // Constructor stores success_fraction p. RealType result; geometric_detail::check_dist( @@ -127,22 +126,22 @@ namespace boost } // geometric_distribution constructor. // Private data getter class member functions. - RealType success_fraction() const + BOOST_MATH_GPU_ENABLED RealType success_fraction() const { // Probability of success as fraction in range 0 to 1. return m_p; } - RealType successes() const + BOOST_MATH_GPU_ENABLED RealType successes() const { // Total number of successes r = 1 (for compatibility with negative binomial?). return 1; } // Parameter estimation. // (These are copies of negative_binomial distribution with successes = 1). - static RealType find_lower_bound_on_p( + BOOST_MATH_GPU_ENABLED static RealType find_lower_bound_on_p( RealType trials, RealType alpha) // alpha 0.05 equivalent to 95% for one-sided test. { - static const char* function = "boost::math::geometric<%1%>::find_lower_bound_on_p"; + constexpr auto function = "boost::math::geometric<%1%>::find_lower_bound_on_p"; RealType result = 0; // of error checks. RealType successes = 1; RealType failures = trials - successes; @@ -163,11 +162,11 @@ namespace boost return ibeta_inv(successes, failures + 1, alpha, static_cast(nullptr), Policy()); } // find_lower_bound_on_p - static RealType find_upper_bound_on_p( + BOOST_MATH_GPU_ENABLED static RealType find_upper_bound_on_p( RealType trials, RealType alpha) // alpha 0.05 equivalent to 95% for one-sided test. { - static const char* function = "boost::math::geometric<%1%>::find_upper_bound_on_p"; + constexpr auto function = "boost::math::geometric<%1%>::find_upper_bound_on_p"; RealType result = 0; // of error checks. RealType successes = 1; RealType failures = trials - successes; @@ -195,12 +194,12 @@ namespace boost // Estimate number of trials : // "How many trials do I need to be P% sure of seeing k or fewer failures?" - static RealType find_minimum_number_of_trials( + BOOST_MATH_GPU_ENABLED static RealType find_minimum_number_of_trials( RealType k, // number of failures (k >= 0). RealType p, // success fraction 0 <= p <= 1. RealType alpha) // risk level threshold 0 <= alpha <= 1. { - static const char* function = "boost::math::geometric<%1%>::find_minimum_number_of_trials"; + constexpr auto function = "boost::math::geometric<%1%>::find_minimum_number_of_trials"; // Error checks: RealType result = 0; if(false == geometric_detail::check_dist_and_k( @@ -213,12 +212,12 @@ namespace boost return result + k; } // RealType find_number_of_failures - static RealType find_maximum_number_of_trials( + BOOST_MATH_GPU_ENABLED static RealType find_maximum_number_of_trials( RealType k, // number of failures (k >= 0). RealType p, // success fraction 0 <= p <= 1. RealType alpha) // risk level threshold 0 <= alpha <= 1. { - static const char* function = "boost::math::geometric<%1%>::find_maximum_number_of_trials"; + constexpr auto function = "boost::math::geometric<%1%>::find_maximum_number_of_trials"; // Error checks: RealType result = 0; if(false == geometric_detail::check_dist_and_k( @@ -244,22 +243,22 @@ namespace boost #endif template - inline const std::pair range(const geometric_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair range(const geometric_distribution& /* dist */) { // Range of permissible values for random variable k. using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); // max_integer? + return boost::math::pair(static_cast(0), max_value()); // max_integer? } template - inline const std::pair support(const geometric_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair support(const geometric_distribution& /* dist */) { // Range of supported values for random variable k. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); // max_integer? + return boost::math::pair(static_cast(0), max_value()); // max_integer? } template - inline RealType mean(const geometric_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mean(const geometric_distribution& dist) { // Mean of geometric distribution = (1-p)/p. return (1 - dist.success_fraction() ) / dist.success_fraction(); } // mean @@ -267,21 +266,21 @@ namespace boost // median implemented via quantile(half) in derived accessors. template - inline RealType mode(const geometric_distribution&) + BOOST_MATH_GPU_ENABLED inline RealType mode(const geometric_distribution&) { // Mode of geometric distribution = zero. BOOST_MATH_STD_USING // ADL of std functions. return 0; } // mode template - inline RealType variance(const geometric_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType variance(const geometric_distribution& dist) { // Variance of Binomial distribution = (1-p) / p^2. return (1 - dist.success_fraction()) / (dist.success_fraction() * dist.success_fraction()); } // variance template - inline RealType skewness(const geometric_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType skewness(const geometric_distribution& dist) { // skewness of geometric distribution = 2-p / (sqrt(r(1-p)) BOOST_MATH_STD_USING // ADL of std functions. RealType p = dist.success_fraction(); @@ -289,7 +288,7 @@ namespace boost } // skewness template - inline RealType kurtosis(const geometric_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const geometric_distribution& dist) { // kurtosis of geometric distribution // http://en.wikipedia.org/wiki/geometric is kurtosis_excess so add 3 RealType p = dist.success_fraction(); @@ -297,7 +296,7 @@ namespace boost } // kurtosis template - inline RealType kurtosis_excess(const geometric_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const geometric_distribution& dist) { // kurtosis excess of geometric distribution // http://mathworld.wolfram.com/Kurtosis.html table of kurtosis_excess RealType p = dist.success_fraction(); @@ -312,11 +311,11 @@ namespace boost // chf of geometric distribution provided by derived accessors. template - inline RealType pdf(const geometric_distribution& dist, const RealType& k) + BOOST_MATH_GPU_ENABLED inline RealType pdf(const geometric_distribution& dist, const RealType& k) { // Probability Density/Mass Function. BOOST_FPU_EXCEPTION_GUARD BOOST_MATH_STD_USING // For ADL of math functions. - static const char* function = "boost::math::pdf(const geometric_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::pdf(const geometric_distribution<%1%>&, %1%)"; RealType p = dist.success_fraction(); RealType result = 0; @@ -350,9 +349,9 @@ namespace boost } // geometric_pdf template - inline RealType cdf(const geometric_distribution& dist, const RealType& k) + BOOST_MATH_GPU_ENABLED inline RealType cdf(const geometric_distribution& dist, const RealType& k) { // Cumulative Distribution Function of geometric. - static const char* function = "boost::math::cdf(const geometric_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const geometric_distribution<%1%>&, %1%)"; // k argument may be integral, signed, or unsigned, or floating point. // If necessary, it has already been promoted from an integral type. @@ -381,12 +380,10 @@ namespace boost } // cdf Cumulative Distribution Function geometric. template - inline RealType logcdf(const geometric_distribution& dist, const RealType& k) + BOOST_MATH_GPU_ENABLED inline RealType logcdf(const geometric_distribution& dist, const RealType& k) { // Cumulative Distribution Function of geometric. - using std::pow; - using std::log; - using std::exp; - static const char* function = "boost::math::logcdf(const geometric_distribution<%1%>&, %1%)"; + BOOST_MATH_STD_USING + constexpr auto function = "boost::math::logcdf(const geometric_distribution<%1%>&, %1%)"; // k argument may be integral, signed, or unsigned, or floating point. // If necessary, it has already been promoted from an integral type. @@ -399,7 +396,7 @@ namespace boost k, &result, Policy())) { - return -std::numeric_limits::infinity(); + return -boost::math::numeric_limits::infinity(); } if(k == 0) { @@ -413,10 +410,10 @@ namespace boost } // logcdf Cumulative Distribution Function geometric. template - inline RealType cdf(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { // Complemented Cumulative Distribution Function geometric. BOOST_MATH_STD_USING - static const char* function = "boost::math::cdf(const geometric_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const geometric_distribution<%1%>&, %1%)"; // k argument may be integral, signed, or unsigned, or floating point. // If necessary, it has already been promoted from an integral type. RealType const& k = c.param; @@ -438,10 +435,10 @@ namespace boost } // cdf Complemented Cumulative Distribution Function geometric. template - inline RealType logcdf(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType logcdf(const complemented2_type, RealType>& c) { // Complemented Cumulative Distribution Function geometric. BOOST_MATH_STD_USING - static const char* function = "boost::math::logcdf(const geometric_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::logcdf(const geometric_distribution<%1%>&, %1%)"; // k argument may be integral, signed, or unsigned, or floating point. // If necessary, it has already been promoted from an integral type. RealType const& k = c.param; @@ -455,21 +452,21 @@ namespace boost k, &result, Policy())) { - return -std::numeric_limits::infinity(); + return -boost::math::numeric_limits::infinity(); } return boost::math::log1p(-p, Policy()) * (k+1); } // logcdf Complemented Cumulative Distribution Function geometric. template - inline RealType quantile(const geometric_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const geometric_distribution& dist, const RealType& x) { // Quantile, percentile/100 or Percent Point geometric function. // Return the number of expected failures k for a given probability p. // Inverse cumulative Distribution Function or Quantile (percentile / 100) of geometric Probability. // k argument may be integral, signed, or unsigned, or floating point. - static const char* function = "boost::math::quantile(const geometric_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const geometric_distribution<%1%>&, %1%)"; BOOST_MATH_STD_USING // ADL of std functions. RealType success_fraction = dist.success_fraction(); @@ -513,11 +510,11 @@ namespace boost } // RealType quantile(const geometric_distribution dist, p) template - inline RealType quantile(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { // Quantile or Percent Point Binomial function. // Return the number of expected failures k for a given // complement of the probability Q = 1 - P. - static const char* function = "boost::math::quantile(const geometric_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const geometric_distribution<%1%>&, %1%)"; BOOST_MATH_STD_USING // Error checks: RealType x = c.param; diff --git a/include/boost/math/distributions/holtsmark.hpp b/include/boost/math/distributions/holtsmark.hpp new file mode 100644 index 0000000000..04f5484f4e --- /dev/null +++ b/include/boost/math/distributions/holtsmark.hpp @@ -0,0 +1,2518 @@ +// Copyright Takuma Yoshimura 2024. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_STATS_HOLTSMARK_HPP +#define BOOST_STATS_HOLTSMARK_HPP + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4127) // conditional expression is constant +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef BOOST_MATH_HAS_NVRTC +#include +#include +#include +#include +#endif + +namespace boost { namespace math { +template +class holtsmark_distribution; + +namespace detail { + +template +BOOST_MATH_GPU_ENABLED inline RealType holtsmark_pdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x < 1) { + // Rational Approximation + // Maximum Relative Error: 4.7894e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(2.87352751452164445024e-1), + static_cast(1.18577398160636011811e-3), + static_cast(-2.16526599226820153260e-2), + static_cast(2.06462093371223113592e-3), + static_cast(2.43382128013710116747e-3), + static_cast(-2.15930711444603559520e-4), + static_cast(-1.04197836740809694657e-4), + static_cast(1.74679078247026597959e-5), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1.), + static_cast(4.12654472808214997252e-3), + static_cast(2.93891863033354755743e-1), + static_cast(8.70867222155141724171e-3), + static_cast(3.15027515421842640745e-2), + static_cast(2.11141832312672190669e-3), + static_cast(1.23545521355569424975e-3), + static_cast(1.58181113865348637475e-4), + }; + + result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x); + } + else if (x < 2) { + RealType t = x - 1; + + // Rational Approximation + // Maximum Relative Error: 3.0925e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(2.02038159607840130389e-1), + static_cast(-1.20368541260123112191e-2), + static_cast(-3.19235497414059987151e-3), + static_cast(8.88546222140257289852e-3), + static_cast(-5.37287599824602316660e-4), + static_cast(-2.39059149972922243276e-4), + static_cast(9.19551014849109417931e-5), + static_cast(-8.45210544648986348854e-6), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1.), + static_cast(6.11634701234079515138e-1), + static_cast(4.39922162828115412952e-1), + static_cast(1.73609068791154078128e-1), + static_cast(6.15831808473403962054e-2), + static_cast(1.64364949550314788638e-2), + static_cast(2.94399615562137394932e-3), + static_cast(4.99662797033514776061e-4), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 4) { + RealType t = x - 2; + + // Rational Approximation + // Maximum Relative Error: 1.4499e-17 + BOOST_MATH_STATIC const RealType P[10] = { + static_cast(8.45396231261375200568e-2), + static_cast(-9.15509628797205847643e-3), + static_cast(1.82052933284907579374e-2), + static_cast(-2.44157914076021125182e-4), + static_cast(8.40871885414177705035e-4), + static_cast(7.26592615882060553326e-5), + static_cast(-1.87768359214600016641e-6), + static_cast(1.65716961206268668529e-6), + static_cast(-1.73979640146948858436e-7), + static_cast(7.24351142163396584236e-9), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1.), + static_cast(8.88099527896838765666e-1), + static_cast(6.53896948546877341992e-1), + static_cast(2.96296982585381844864e-1), + static_cast(1.14107585229341489833e-1), + static_cast(3.08914671331207488189e-2), + static_cast(7.03139384769200902107e-3), + static_cast(1.01201814277918577790e-3), + static_cast(1.12200113270398674535e-4), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 8) { + RealType t = x - 4; + + // Rational Approximation + // Maximum Relative Error: 6.5259e-17 + BOOST_MATH_STATIC const RealType P[11] = { + static_cast(1.36729417918039395222e-2), + static_cast(1.19749117683408419115e-2), + static_cast(6.26780921592414207398e-3), + static_cast(1.84846137440857608948e-3), + static_cast(3.39307829797262466829e-4), + static_cast(2.73606960463362090866e-5), + static_cast(-1.14419838471713498717e-7), + static_cast(1.64552336875610576993e-8), + static_cast(-7.95501797873739398143e-10), + static_cast(2.55422885338760255125e-11), + static_cast(-4.12196487201928768038e-13), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1.), + static_cast(1.61334003864149486454e0), + static_cast(1.28348868912975898501e0), + static_cast(6.36594545291321210154e-1), + static_cast(2.11478937436277242988e-1), + static_cast(4.71550897200311391579e-2), + static_cast(6.64679677197059316835e-3), + static_cast(4.93706832858615742810e-4), + static_cast(9.26919465059204396228e-6), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 16) { + RealType t = x - 8; + + // Rational Approximation + // Maximum Relative Error: 3.5084e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(1.90649774685568282390e-3), + static_cast(7.43708409389806210196e-4), + static_cast(9.53777347766128955847e-5), + static_cast(3.79800193823252979170e-6), + static_cast(2.84836656088572745575e-8), + static_cast(-1.22715411241721187620e-10), + static_cast(8.56789906419220801109e-13), + static_cast(-4.17784858891714869163e-15), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(7.29383849235788831455e-1), + static_cast(2.16287201867831015266e-1), + static_cast(3.28789040872705709070e-2), + static_cast(2.64660789801664804789e-3), + static_cast(1.03662724048874906931e-4), + static_cast(1.47658125632566407978e-6), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 32) { + RealType t = x - 16; + + // Rational Approximation + // Maximum Relative Error: 1.4660e-19 + BOOST_MATH_STATIC const RealType P[9] = { + static_cast(3.07231582988207590928e-4), + static_cast(5.16108848485823513911e-5), + static_cast(3.05776014220862257678e-6), + static_cast(7.64787444325088143218e-8), + static_cast(7.40426355029090813961e-10), + static_cast(1.57451122102115077046e-12), + static_cast(-2.14505675750572782093e-15), + static_cast(5.11204601013038698192e-18), + static_cast(-9.00826023095223871551e-21), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1.), + static_cast(3.28966789835486457746e-1), + static_cast(4.46981634258601621625e-2), + static_cast(3.22521297380474263906e-3), + static_cast(1.31985203433890010111e-4), + static_cast(3.01507121087942156530e-6), + static_cast(3.47777238523841835495e-8), + static_cast(1.50780503777979189972e-10), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 64) { + RealType t = x - 32; + + // Rational Approximation + // Maximum Relative Error: 4.2292e-18 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(5.25741312407933720817e-5), + static_cast(2.34425802342454046697e-6), + static_cast(3.30042747965497652847e-8), + static_cast(1.58564820095683252738e-10), + static_cast(1.54070758384735212486e-13), + static_cast(-8.89232435250437247197e-17), + static_cast(8.14099948000080417199e-20), + static_cast(-4.61828164399178360925e-23), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(1.23544974283127158019e-1), + static_cast(6.01210465184576626802e-3), + static_cast(1.45390926665383063500e-4), + static_cast(1.80594709695117864840e-6), + static_cast(1.06088985542982155880e-8), + static_cast(2.20287881724613104903e-11), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + RealType t = 1 / sqrt(x * x * x); + + // Rational Approximation + // Maximum Relative Error: 2.3004e-17 + BOOST_MATH_STATIC const RealType P[4] = { + static_cast(2.99206710301074508455e-1), + static_cast(-8.62469397757826072306e-1), + static_cast(1.74661995423629075890e-1), + static_cast(8.75909164947413479137e-1), + }; + BOOST_MATH_STATIC const RealType Q[3] = { + static_cast(1.), + static_cast(-6.07405848111002255020e0), + static_cast(1.34068401972703571636e1), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t / x; + } + + return result; +} + + +template +BOOST_MATH_GPU_ENABLED inline RealType holtsmark_pdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x < 1) { + // Rational Approximation + // Maximum Relative Error: 4.5215e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.87352751452164445024482162286994868262e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.07622509000285763173795736744991173600e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.75004930885780661923539070646503039258e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.72358602484766333657370198137154157310e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.80082654994455046054228833198744292689e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.53887200727615005180492399966262970151e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07684195532179300820096260852073763880e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.39151986881253768780523679256708455051e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.31700721746247708002568205696938014069e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.52538425285394123789751606057231671946e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.13997198703138372752313576244312091598e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74788965317036115104204201740144738267e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.18994723428163008965406453309272880204e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.49208308902369087634036371223527932419e-11), + }; + BOOST_MATH_STATIC const RealType Q[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.07053963271862256947338846403373278592e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.30146528469038357598785392812229655811e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.22168809220570888957518451361426420755e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.30911708477464424748895247790513118077e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.32037605861909345291211474811347056388e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.37380742268959889784160508321242249326e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.17777859396994816599172003124202701362e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.69357597449425742856874347560067711953e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.22061268498705703002731594804187464212e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.03685918248668999775572498175163352453e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.42037705933347925911510259098903765388e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.13651251802353350402740200231061151003e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.15390928968620849348804301589542546367e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.96186359077726620124148756657971390386e-9), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x); + } + else if (x < 2) { + RealType t = x - 1; + + // Rational Approximation + // Maximum Relative Error: 1.3996e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.02038159607840130388931544845552929992e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.85240836242909590376775233472494840074e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.92928437142375928121954427888812334305e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.56075992368354834619445578502239925632e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85410663490566091471288623735720924369e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.09160661432404033681463938555133581443e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60290555290385646856693819798655258098e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24420942563054709904053017769325945705e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.06370233020823161157791461691510091864e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.51562554221298564845071290898761434388e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.77361020844998296791409508640756247324e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.10768937536097342883548728871352580308e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.97810512763454658214572490850146305033e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.77430867682132459087084564268263825239e-11), + }; + BOOST_MATH_STATIC const RealType Q[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.30030169049261634787262795838348954434e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.45935676273909940847479638179887855033e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.14724239378269259016679286177700667008e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21580123796578745240828564510740594111e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.70287348745451818082884807214512422940e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46859813604124308580987785473592196488e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.49627445316021031361394030382456867983e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05157712406194406440213776605199788051e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.91541875103990251411297099611180353187e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.47960462287955806798879139599079388744e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.80126815763067695392857052825785263211e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04569118116204820761181992270024358122e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.63024381269503801668229632579505279520e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00967434338725770754103109040982001783e-8), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 4) { + RealType t = x - 2; + + // Rational Approximation + // Maximum Relative Error: 1.6834e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[17] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.45396231261375200568114750897618690566e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.83107635287140466760500899510899613385e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.71690205829238281191309321676655995475e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.95995611963950467634398178757261552497e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.52444689050426648467863527289016233648e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.40423239472181137610649503303203209123e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72181273738390251101985797318639680476e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.11423032981781501087311583401963332916e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37255768388351332508195641748235373885e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.25140171472943043666747084376053803301e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.98925617316135247540832898350427842870e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.27532592227329144332335468302536835334e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.25846339430429852334026937219420930290e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.17852693845678292024334670662803641322e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60008761860786244203651832067697976835e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.85474213475378978699789357283744252832e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.05561259222780127064607109581719435800e-15), + }; + BOOST_MATH_STATIC const RealType Q[17] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08902510590064634965634560548380735284e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.60127698266075086782895988567899172787e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.73299227011247478433171171063045855612e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.94019328695445269130845646745771017029e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21478511930928822349285105322914093227e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.42888485420705779382804725954524839381e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36839484685440714657854206969200824442e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.77082068469251728028552451884848161629e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.92625563541021144576900067220082880950e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88302521658522279293312672887766072876e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.37703703342287521257351386589629343948e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.32454189932655869016489443530062686013e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.81822848072558151338694737514507945151e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.40176559099032106726456059226930240477e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.55722115663529425797132143276461872035e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.18236697046568703899375072798708359035e-10), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 8) { + RealType t = x - 4; + + // Rational Approximation + // Maximum Relative Error: 5.6207e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[20] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36729417918039395222067998266923903488e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.05780369334958736210688756060527042344e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88449456199223796440901487003885388570e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20213624124017393492512893302682417041e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.95009975955570002297453163471062373746e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35668345583965001606910217518443864382e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.69006847702829685253055277085000792826e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.08366922884479491780654020783735539561e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.71834368599657597252633517017213868956e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.88269472722301903965736220481240654265e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37797139843759131750966129487745639531e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.72390971590654495025982276782257590019e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.68354503497961090303189233611418754374e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20749461042713568368181066233478264894e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.71167265100639100355339812752823628805e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37497033071709741762372104386727560387e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.08992504249040731356693038222581843266e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.03311745412603363076896897060158476094e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.89266184062176002518506060373755160893e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.22157263424086267338486564980223658130e-22), + }; + BOOST_MATH_STATIC const RealType Q[19] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.24254809760594824834854946949546737102e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.66740386908805016172202899592418717176e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.17175023341071972435947261868288366592e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33939409711833786730168591434519989589e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.58859674176126567295417811572162232222e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.66346764121676348703738437519493817401e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.00687534341032230207422557716131339293e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.57352381181825892637055619366793541271e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.23955067096868711061473058513398543786e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28279376429637301814743591831507047825e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22380760186302431267562571014519501842e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.21421839279245792393425090284615681867e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.80151544531415207189620615654737831345e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.57177992740786529976179511261318869505e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.54223623314672019530719165336863142227e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26447311109866547647645308621478963788e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.76514314007336173875469200193103772775e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.63785420481380041892410849615596985103e-13), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 16) { + RealType t = x - 8; + + // Rational Approximation + // Maximum Relative Error: 6.8882e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[17] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.90649774685568282389553481307707005425e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.70151946710788532273869130544473159961e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76188245008605985768921328976193346788e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.94997481586873355765607596415761713534e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83556339450065349619118429405554762845e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.39766178753196196595432796889473826698e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.48835240264191055418415753552383932859e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.23205178959384483669515397903609703992e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.80665018951397281836428650435128239368e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27113208299726105096854812628329439191e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.75272882929773945317046764560516449105e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.73174017370926101455204470047842394787e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.55548825213165929101134655786361059720e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.79786015549170518239230891794588988732e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.73060731998834750292816218696923192789e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.62842837946576938669447109511449827857e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33878078951302606409419167741041897986e-26), + }; + BOOST_MATH_STATIC const RealType Q[17] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.75629880937514507004822969528240262723e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43883005193126748135739157335919076027e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.26826935326347315479579835343751624245e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.52263130214924169696993839078084050641e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.34708681216662922818631865761136370252e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19079618273418070513605131981401070622e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.68812668867590621701228940772852924670e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.81323523265546812020317698573638573275e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46655191174052062382710487986225631851e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.79864553144116347379916608661549264281e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.81866770335021233700248077520029108331e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15408288688082935176022095799735538723e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29421875915133979067465908221270435168e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74564282803894180881025348633912184161e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.69782249847887916810010605635064672269e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.85875986197737611300062229945990879767e-18), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 32) { + RealType t = x - 16; + + // Rational Approximation + // Maximum Relative Error: 2.7988e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.07231582988207590928480356376941073734e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35574911514921623999866392865480652576e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.60219401814297026945664630716309317015e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.84927222345566515103807882976184811760e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96327408363203008584583124982694689234e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.86684048703029160378252571846517319101e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65469175974819997602752600929172261626e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.21842057555380199566706533446991680612e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.53555106309423641769303386628162522042e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.92686543698369260585325449306538016446e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.01838615452860702770059987567879856504e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.65492535746962514730615062374864701860e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.53395563720606494853374354984531107080e-24), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.99957357701259203151690416786669242677e-28), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46357124817620384236108395837490629563e-31), + }; + BOOST_MATH_STATIC const RealType Q[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.02259092175256156108200465685980768901e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63438230616954606028022008517920766366e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.63880061357592661176130881772975919418e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.81911305852397235014131637306820512975e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.09690724408294608306577482852270088377e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11275552068434583356476295833517496456e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.24681861037105338446379750828324925566e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16034379416965004687140768474445096709e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.23234481703249409689976894391287818596e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.93297387560911081670605071704642179017e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.50338428974314371000017727660753886621e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27897854868353937080739431205940604582e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.37798740524930029176790562876868493344e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.29920082153439260734550295626576101192e-22), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 64) { + RealType t = x - 32; + + // Rational Approximation + // Maximum Relative Error: 6.9688e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.25741312407933720816582583160953651639e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.04434146174674791036848306058526901384e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.68959516304795838166182070164492846877e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78859935261158263390023581309925613858e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.21854067989018450973827853792407054510e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20573856697340412957421887367218135538e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30843538021351383101589538141878424462e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.05991458689384045976214216819611949900e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.82253708752556965233757129893944884411e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.97645331663303764054986066027964294209e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.69353366461654917577775981574517182648e-24), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.59050144462227302681332505386238071973e-27), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.85165507189649330971049854127575847359e-31), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70711310565669331853925519429988855964e-34), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.72047006026700174884151916064158941262e-38), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.50985661940624198574968436548711898948e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.81705882167596649186405364717835589894e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.86537779048672498307196786015602357729e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.09555188550938733096253930959407749063e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41930442687159455334801545898059105733e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.09084284266255183930305946875294557622e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.58122754063904909636061457739518406730e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.91800215912676651584368499126132687326e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.66413330532845384974993669138524203429e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65919563020196445006309683624384862816e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.61596083414169579692212575079167989319e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16321386033703806802403099255708972015e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.90892719803158002834365234646982537288e-25), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + RealType t = 1 / sqrt(x * x * x); + + // Rational Approximation + // Maximum Relative Error: 3.0545e-39 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[8] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.99206710301074508454959544950786401357e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.75243304700875633383991614142545185173e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.69652690455351600373808930804785330828e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.36233941060408773406522171349397343951e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.28958973553713980463808202034854958375e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.55704950313835982743029388151551925282e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.28767698270323629107775935552991333781e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.80591252844738626580182351673066365090e1), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.57593243741246726197476469913307836496e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.99458751269722094414105565700775283458e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.91043982880665229427553316951582511317e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.99054490423334526438490907473548839751e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.36948968143124830402744607365089118030e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13781639547150826385071482161074041168e4), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t / x; + } + + return result; +} + + +template +BOOST_MATH_GPU_ENABLED inline RealType holtsmark_pdf_imp_prec(const RealType& x, const boost::math::integral_constant &tag) { + BOOST_MATH_STD_USING // for ADL of std functions + + return holtsmark_pdf_plus_imp_prec(abs(x), tag); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType holtsmark_pdf_imp_prec(const RealType& x, const boost::math::integral_constant& tag) { + BOOST_MATH_STD_USING // for ADL of std functions + + return holtsmark_pdf_plus_imp_prec(abs(x), tag); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType holtsmark_pdf_imp(const holtsmark_distribution& dist, const RealType& x) { + // + // This calculates the pdf of the Holtsmark distribution and/or its complement. + // + + BOOST_MATH_STD_USING // for ADL of std functions + constexpr auto function = "boost::math::pdf(holtsmark<%1%>&, %1%)"; + RealType result = 0; + RealType location = dist.location(); + RealType scale = dist.scale(); + + if (false == detail::check_location(function, location, &result, Policy())) + { + return result; + } + if (false == detail::check_scale(function, scale, &result, Policy())) + { + return result; + } + if (false == detail::check_x(function, x, &result, Policy())) + { + return result; + } + + typedef typename tools::promote_args::type result_type; + typedef typename policies::precision::type precision_type; + typedef boost::math::integral_constant tag_type; + + static_assert(tag_type::value, "The Holtsmark distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats"); + + RealType u = (x - location) / scale; + + result = holtsmark_pdf_imp_prec(u, tag_type()) / scale; + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType holtsmark_cdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x < 0.5) { + // Rational Approximation + // Maximum Relative Error: 1.3147e-17 + BOOST_MATH_STATIC const RealType P[6] = { + static_cast(5.0e-1), + static_cast(-1.34752580674786639030e-1), + static_cast(1.86318418252163378528e-2), + static_cast(1.04499798132512381447e-2), + static_cast(-1.60831910014592923855e-3), + static_cast(1.38823662364438342844e-4), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(3.05200341554753776087e-1), + static_cast(2.12663999430421346175e-1), + static_cast(7.23836000984872591553e-2), + static_cast(1.67941072412796299986e-2), + static_cast(4.71213644318790580839e-3), + static_cast(5.86825130959777535991e-4), + }; + + result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x); + } + else if (x < 1) { + RealType t = x - 0.5f; + + // Rational Approximation + // Maximum Relative Error: 1.6265e-18 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(3.60595773518728397351e-1), + static_cast(5.75238626843218819756e-1), + static_cast(-3.31245319943021227117e-1), + static_cast(1.48132966310216368831e-1), + static_cast(-2.32875122617713403365e-2), + static_cast(2.08038303148835575624e-3), + static_cast(6.01511310581302829460e-6), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(2.32264360456739861886e0), + static_cast(6.39715443864749851087e-1), + static_cast(5.03940458163958921325e-1), + static_cast(8.84780893031413729292e-2), + static_cast(3.01497774031208621961e-2), + static_cast(3.45886005612108195390e-3), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 2) { + RealType t = x - 1; + + // Rational Approximation + // Maximum Relative Error: 7.4398e-20 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(2.43657975600729535515e-1), + static_cast(-6.02286263626532324632e-2), + static_cast(4.68361231392743283350e-2), + static_cast(-1.13497179885838883972e-3), + static_cast(1.20141595689136205012e-3), + static_cast(3.02402304689333413256e-4), + static_cast(-1.22652173865646814676e-6), + static_cast(2.29521832683440044997e-6), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1.), + static_cast(5.82002427359748247121e-1), + static_cast(3.96529686558825119743e-1), + static_cast(1.49690294526117385174e-1), + static_cast(5.15049953937764895435e-2), + static_cast(1.30218216530450637564e-2), + static_cast(2.53640337919037463659e-3), + static_cast(3.79575042317720710311e-4), + static_cast(2.94034997185982139717e-5), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 4) { + RealType t = x - 2; + + // Rational Approximation + // Maximum Relative Error: 5.6148e-17 + BOOST_MATH_STATIC const RealType P[9] = { + static_cast(1.05039829654829164883e-1), + static_cast(1.66621813028423002562e-2), + static_cast(2.93820049104275137099e-2), + static_cast(3.36850260303189378587e-3), + static_cast(2.27925819398326978014e-3), + static_cast(1.66394162680543987783e-4), + static_cast(4.51400415642703075050e-5), + static_cast(2.12164734714059446913e-7), + static_cast(1.69306881760242775488e-8), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1.), + static_cast(9.63461239051296108254e-1), + static_cast(6.54183344973801096611e-1), + static_cast(2.92007762594247903696e-1), + static_cast(1.00918751132022401499e-1), + static_cast(2.55899135910670703945e-2), + static_cast(4.85740416919283630358e-3), + static_cast(6.11435190489589619906e-4), + static_cast(4.10953248859973756440e-5), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 8) { + RealType t = x - 4; + + // Rational Approximation + // Maximum Relative Error: 6.5866e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(3.05754562114095142887e-2), + static_cast(3.25462617990002726083e-2), + static_cast(1.78205524297204753048e-2), + static_cast(5.61565369088816402420e-3), + static_cast(1.05695297340067353106e-3), + static_cast(9.93588579804511250576e-5), + static_cast(2.94302107205379334662e-6), + static_cast(1.09016076876928010898e-8), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1.), + static_cast(1.51164395622515150122e0), + static_cast(1.09391911233213526071e0), + static_cast(4.77950346062744800732e-1), + static_cast(1.34082684956852773925e-1), + static_cast(2.37572579895639589816e-2), + static_cast(2.41806218388337284640e-3), + static_cast(1.10378140456646280084e-4), + static_cast(1.31559373832822136249e-6), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 16) { + RealType t = x - 8; + + // Rational Approximation + // Maximum Relative Error: 5.6575e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(9.47408470248235718880e-3), + static_cast(4.70888722333356024081e-3), + static_cast(8.66397831692913140221e-4), + static_cast(7.11721056656424862090e-5), + static_cast(2.56320582355149253994e-6), + static_cast(3.37749186035552101702e-8), + static_cast(8.32182844837952178153e-11), + static_cast(-8.80541360484428526226e-14), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1.), + static_cast(6.98261117346347123707e-1), + static_cast(1.97823959738695249267e-1), + static_cast(2.89311735096848395080e-2), + static_cast(2.30087055379997473849e-3), + static_cast(9.60592522700377510007e-5), + static_cast(1.84474415187428058231e-6), + static_cast(1.14339998084523151203e-8), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 32) { + RealType t = x - 16; + + // Rational Approximation + // Maximum Relative Error: 1.4164e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(3.19610991747326729867e-3), + static_cast(5.11880074251341162590e-4), + static_cast(2.80704092977662888563e-5), + static_cast(6.31310155466346114729e-7), + static_cast(5.29618446795457166842e-9), + static_cast(9.20292337847562746519e-12), + static_cast(-9.16761719448360345363e-15), + static_cast(1.20433396121606479712e-17), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(2.56283944667056551858e-1), + static_cast(2.56811818304462676948e-2), + static_cast(1.26678062261253559927e-3), + static_cast(3.17001344827541091252e-5), + static_cast(3.68737201224811007437e-7), + static_cast(1.47625352605312785910e-9), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 64) { + RealType t = x - 32; + + // Rational Approximation + // Maximum Relative Error: 9.2537e-18 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(1.11172037056341397612e-3), + static_cast(7.84545643188695076893e-5), + static_cast(1.94862940242223222641e-6), + static_cast(2.02704958737259525509e-8), + static_cast(7.99772378955335076832e-11), + static_cast(6.62544230949971310060e-14), + static_cast(-3.18234118727325492149e-17), + static_cast(2.03424457039308806437e-20), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(1.17861198759233241198e-1), + static_cast(5.45962263583663240699e-3), + static_cast(1.25274651876378267111e-4), + static_cast(1.46857544539612002745e-6), + static_cast(8.06441204620771968579e-9), + static_cast(1.53682779460286464073e-11), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + RealType x_cube = x * x * x; + RealType t = static_cast((boost::math::isnormal)(x_cube) ? 1 / sqrt(x_cube) : 1 / pow(sqrt(x), 3)); + + // Rational Approximation + // Maximum Relative Error: 4.2897e-18 + BOOST_MATH_STATIC const RealType P[4] = { + static_cast(1.99471140200716338970e-1), + static_cast(-6.90933799347184400422e-1), + static_cast(4.30385245884336871950e-1), + static_cast(3.52790131116013716885e-1), + }; + BOOST_MATH_STATIC const RealType Q[3] = { + static_cast(1.), + static_cast(-5.05959751628952574534e0), + static_cast(8.04408113719341786819e0), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t; + } + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType holtsmark_cdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x < 0.5) { + // Rational Approximation + // Maximum Relative Error: 8.6635e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.0e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.48548242430636907136192799540229598637e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31541453581608245475805834922621529866e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.16579064508490250336159593502955219069e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.61598809551362112011328341554044706550e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.15119245273512554325709429759983470969e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02145196753734867721148927112307708045e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.90817224464950088663183617156145065001e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.69596202760983052482358128481956242532e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.50461337222845025623869078372182437091e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.62777995800923647521692709390412901586e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.63937253747323898965514197114021890186e-8), + }; + BOOST_MATH_STATIC const RealType Q[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.76090180430550757765787254935343576341e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.07685236907561593034104428156351640194e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.27770556484351179553611274487979706736e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.99201460869149634331004096815257398515e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.70139000408086498153685620963430185837e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74682544708653069148470666809094453722e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57607114117485446922700160080966856243e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01069214414741946409122492979083487977e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.19996282759031441186748256811206136921e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60933466092746543579699079418115420013e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.92780739162611243933581782562159603862e-8), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x); + } + else if (x < 1) { + RealType t = x - 0.5; + + // Rational Approximation + // Maximum Relative Error: 7.1235e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.60595773518728397925852903878144761766e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.46999595154527091473427440379143006753e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36962313432466566724352608642383560211e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.08387290167105915393692028475888846796e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.34156151832478939276011262838869269011e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.15970594471853166393830585755485842021e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.47022841547527682761332752928069503835e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.01955019188793323293925482112543902560e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.03069493388735516695142799880566783261e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.61367662035593735709965982000611000987e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.62800430658278408539398798888955969345e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.22300086876618079439960709120163780513e-8), + }; + BOOST_MATH_STATIC const RealType Q[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.19740977756009966244249035150363085180e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39394884078938560974435920719979860046e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.97107758486905601309707335353809421910e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.36594079604957733960211938310153276332e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.85712904264673773213248691029253356702e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.87605080555629969548037543637523346061e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.26356599628579249350545909071984757938e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.79582114368994462181480978781382155103e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.00970375323007336435151032145023199020e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.06528824060244313614177859412028348352e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.13914667697998291289987140319652513139e-7), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 2) { + RealType t = x - 1; + + // Rational Approximation + // Maximum Relative Error: 6.7659e-38 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.43657975600729535499895880792984203140e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.37090874182351552816526775008685285108e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.70793783828569126853147999925198280654e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.27295555253412802819195403503721983066e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.95916890788873842705597506423512639342e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.93625795791721417553345795882983866640e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.73237387099610415336810752053706403935e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.08118655139419640900853055479087235138e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74920069862339840183963818219485580710e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59015304773612605296533206093582658838e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.57256820413579442950151375512313072105e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.36240848333000575199740403759568680951e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.53890585580518120552628221662318725825e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.59245311730292556271235324976832000740e-10), + }; + BOOST_MATH_STATIC const RealType Q[16] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.49800491033591771256676595185869442663e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.35827615015880595229881139361463765537e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41657125931991211322147702760511651998e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11782602975553967179829921562737846592e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.79410805176258968660086532862367842847e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22872839892405613311532856773434270554e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.23742349724658114137235071924317934569e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.80350762663884259375711329227548815674e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59501693037547119094683008622867020131e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.86068186167498269806443077840917848151e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.36940342373887783231154918541990667741e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.48911186460768204167014270878839691938e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.55051094964993052272146587430780404904e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.96312716130620326771080033656930839768e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45496951385730104726429368791951742738e-10), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 4) { + RealType t = x - 2; + + // Rational Approximation + // Maximum Relative Error: 9.9091e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[17] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05039829654829170780787685299556996311e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.28948022754388615368533934448107849329e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.34139151583225691775740839359914493385e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13366377215523066657592295006960955345e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08045462837998791188853367062130086996e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.37648565386728404881404199616182064711e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14881702523183566448187346081007871684e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.73169022445183613027772635992366708052e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.86434609673325793686202636939208406356e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20865083025640755296377488921536984172e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.24550087063009488023243811976147518386e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78763689691843975658550702147832072016e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.53901449493513509116902285044951137217e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.64133451376958243174967226929215155126e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78021916681275593923355425070000331160e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.40116391931116431686557163556034777896e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.43891156389092896219387988411277617045e-15), + }; + BOOST_MATH_STATIC const RealType Q[17] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30840297297890638941129884491157396207e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16059271948787750556465175239345182035e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.32333703228724830516425197803770832978e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.74722711058640395885914966387546141874e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57544653090705553268164186689966671940e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.65943099435809995745673109708218670077e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74158626875895095042054345316232575354e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.65978318533667031874695821156329945501e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07907034178758316909655424935083792468e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.16769901831316460137104511711073411646e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.72764558714782436683712413015421717627e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.42494185105694341746192094740530489313e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.47668761140694808076322373887857100882e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.06395948884595166425357861427667353718e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.04398743651684916010743222115099630062e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47852251142917253705233519146081069006e-10), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 8) { + RealType t = x - 4; + + // Rational Approximation + // Maximum Relative Error: 3.2255e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[20] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.05754562114095147060025732340404111260e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.29082907781747007723015304584383528212e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.15736486393536930535038719804968063752e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.47619683293773846642359668429058772885e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78777185267549567154655052281449528836e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.32280474402180284471490985942690221861e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.45430564625797085273267452885960070105e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.81643129239005795245093568930666448817e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57851748656417804512189330871167578685e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.04264676511381380381909064283066657450e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.84536783037391183433322642273799250079e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.27169201994160924743393109705813711010e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.42623512076200527099335832138825884729e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.98298083389459839517970895839114237996e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.71357920034737751299594537655948527288e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.98563999354325930973228648080876368296e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36248172644168880316722905969876969074e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.61071663749398045880261823483568866904e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.95933262363502031836408613043245164787e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.23007623135952181561484264810647517912e-21), + }; + BOOST_MATH_STATIC const RealType Q[19] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.17760389606658547971193065026711073898e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.49565543987559264712057768584303008339e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.94822569926563661124528478579051628722e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14676844425183314970062115422221981422e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.35960757354198367535169328826167556715e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.04865288305482048252211468989095938024e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.51599632816346741950206107526304703067e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74065824586512487126287762563576185455e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.91819078437689679732215988465616022328e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.41675362609023565846569121735444698127e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17176431752708802291177040031150143262e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52367587943529121285938327286926798550e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59405168077254169099025950029539316125e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29448420654438993509041228047289503943e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.70091773726833073512661846603385666642e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.03909417984236210307694235586859612592e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.59098698207309055890188845050700901852e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.28146456709550379493162440280752828165e-14), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 16) { + RealType t = x - 8; + + // Rational Approximation + // Maximum Relative Error: 2.0174e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[17] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.47408470248235665279366712356669210597e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32149712567170349164953101675315481096e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.39806230477579028722350422669222849223e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.19665271447867857827798702851111114658e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.06773237553503696884546088197977608676e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41294370314265386485116359052296796357e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74848600628353761723457890991084017928e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52963427970210468265870547940464851481e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.33389244528769791436454176079341120973e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.86702000100897346192018772319301428852e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04192907586200235211623448416582655030e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70804269459077260463819507381406529187e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52665761996923502719902050367236108720e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.01866635015788942430563628065687465455e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.46658865059509532456423012727042498365e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.05806999626031246519161395419216393127e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.37645700309533972676063947195650607935e-26), + }; + BOOST_MATH_STATIC const RealType Q[16] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59608758824065179587008165265773042260e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17347162462484266250945490058846704988e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.24511137251392519285309985668265122633e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.58497164094526279145784765183039854604e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.40787701096334660711443654292041286786e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.34615029717812271556414485397095293077e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.17712219229282308306346195001801048971e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.24578142893420308057222282020407949529e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.23429691331344898578916434987129070432e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41486460551571344910835151948209788541e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.23569151219279213399210115101532416912e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.21438860148387356361258237451828377118e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.46770060692933726695086996017149976796e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.58079984178724940266882149462170567147e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19997796316046571607659704855966005180e-17), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 32) { + RealType t = x - 16; + + // Rational Approximation + // Maximum Relative Error: 4.5109e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[16] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.19610991747326725339429696634365932643e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74646611039453235739153286141429338461e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.13331430865337412098234177873337036811e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.58947311195482646360642638791970923726e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.79226752074485124923797575635082779509e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.73081326043094090549807549513512116319e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05408849431691450650464797109033182773e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75716486666270246158606737499459843698e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.81075133718930099703621109350447306080e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.41318403854345256855350755520072932140e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.70220987388883118699419526374266655536e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.38711669183547686107032286389030018396e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.31300491679098874872172866011372530771e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.99223939265527640018203019269955457925e-25), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.18316957049006338447926554380706108087e-28), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.47298013808154174645356607027685011183e-32), + }; + BOOST_MATH_STATIC const RealType Q[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.42561659771176310412113991024326129105e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83353398513931409985504410958429204317e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.07254121026393428163401481487563215753e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.36667170168890854756291846167398225330e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.54019749685699795075624204463938596069e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35321766966107368759516431698755077175e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.13350720091296144188972188966204719103e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.38107118390482863395863404555696613407e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.59267757423034664579822257229473088511e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.29549090773392058626428205171445962834e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.69922128600755513676564327500993739088e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31337037977667816904491472174578334375e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.28088047429043940293455906253037445768e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.01213369826105495256520034997664473667e-22), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 64) { + RealType t = x - 32; + + // Rational Approximation + // Maximum Relative Error: 1.2707e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11172037056341396583040940446061501972e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.09383362521204903801686281772843962372e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71440982391172647693486692131238237524e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.01685075759372692173396811575536866699e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36574894913423830789864836789988898151e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.59644999935503505576091023207315968623e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.95573282292603122067959656607163690356e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.10361486103428098366627536344769789255e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.80946231978997457068033851007899208222e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.39341134002270945594553624959145830111e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.72307967968246649714945553177468010263e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41093409238620968003297675770440189200e-24), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.70464969040825495565297719377221881609e-28), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.25341184125872354328990441812668510029e-32), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.54663422572657744572284839697818435372e-36), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35632539169215377884393376342532721825e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.46975491055790597767445011183622230556e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.51806800870130779095309105834725930741e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.07403939022350326847926101278370197017e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66046114012817696416892197044749060854e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.16723371111678357128668916130767948114e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.22972796529973974439855811125888770710e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.91073180314665062004869985842402705599e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.43753004383633382914827301174981384446e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.77313206526206002175298314351042907499e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.32850553089285690900825039331456226080e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.85369976595753971532524294793778805089e-22), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28948021485210224442871255909409155592e-25), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + RealType x_cube = x * x * x; + RealType t = (boost::math::isnormal)(x_cube) ? 1 / sqrt(x_cube) : 1 / pow(sqrt(x), 3); + + // Rational Approximation + // Maximum Relative Error: 5.4677e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[7] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99471140200716338969973029967190934238e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.48481268366645066801385595379873318648e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.64087860141734943856373451877569284231e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.45555576045996041260191574503331698473e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43290677381328916734673040799990923091e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.63011127597770211743774689830589568544e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.61127812511057623691896118746981066174e0), + }; + BOOST_MATH_STATIC const RealType Q[6] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.90660291309478542795359451748753358123e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60631500002415936739518466837931659008e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.88655117367497147850617559832966816275e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48350179543067311398059386524702440002e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.18873206560757944356169500452181141647e3), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t; + } + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType holtsmark_cdf_imp_prec(const RealType& x, bool complement, const boost::math::integral_constant& tag) { + if (x >= 0) { + return complement ? holtsmark_cdf_plus_imp_prec(x, tag) : 1 - holtsmark_cdf_plus_imp_prec(x, tag); + } + else if (x <= 0) { + return complement ? 1 - holtsmark_cdf_plus_imp_prec(-x, tag) : holtsmark_cdf_plus_imp_prec(-x, tag); + } + else { + return boost::math::numeric_limits::quiet_NaN(); + } +} + +template +BOOST_MATH_GPU_ENABLED inline RealType holtsmark_cdf_imp_prec(const RealType& x, bool complement, const boost::math::integral_constant& tag) { + if (x >= 0) { + return complement ? holtsmark_cdf_plus_imp_prec(x, tag) : 1 - holtsmark_cdf_plus_imp_prec(x, tag); + } + else if (x <= 0) { + return complement ? 1 - holtsmark_cdf_plus_imp_prec(-x, tag) : holtsmark_cdf_plus_imp_prec(-x, tag); + } + else { + return boost::math::numeric_limits::quiet_NaN(); + } +} + +template +BOOST_MATH_GPU_ENABLED inline RealType holtsmark_cdf_imp(const holtsmark_distribution& dist, const RealType& x, bool complement) { + // + // This calculates the cdf of the Holtsmark distribution and/or its complement. + // + + BOOST_MATH_STD_USING // for ADL of std functions + constexpr auto function = "boost::math::cdf(holtsmark<%1%>&, %1%)"; + RealType result = 0; + RealType location = dist.location(); + RealType scale = dist.scale(); + + if (false == detail::check_location(function, location, &result, Policy())) + { + return result; + } + if (false == detail::check_scale(function, scale, &result, Policy())) + { + return result; + } + if (false == detail::check_x(function, x, &result, Policy())) + { + return result; + } + + typedef typename tools::promote_args::type result_type; + typedef typename policies::precision::type precision_type; + typedef boost::math::integral_constant tag_type; + + static_assert(tag_type::value, "The Holtsmark distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats"); + + RealType u = (x - location) / scale; + + result = holtsmark_cdf_imp_prec(u, complement, tag_type()); + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType holtsmark_quantile_upper_imp_prec(const RealType& p, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (ilogb(p) >= -2) { + RealType t = -log2(ldexp(p, 1)); + + // Rational Approximation + // Maximum Relative Error: 5.8068e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(7.59789769759814986929e-1), + static_cast(1.27515008642985381862e0), + static_cast(4.38619247097275579086e-1), + static_cast(-1.25521537863031799276e-1), + static_cast(-2.58555599127223857177e-2), + static_cast(1.20249932437303932411e-2), + static_cast(-1.36753104188136881229e-3), + static_cast(6.57491277860092595148e-5), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1.), + static_cast(2.48696501912062288766e0), + static_cast(2.06239370128871696850e0), + static_cast(5.67577904795053902651e-1), + static_cast(-2.89022828087034733385e-2), + static_cast(-2.17207943286085236479e-2), + static_cast(3.14098307020814954876e-4), + static_cast(3.51448381406676891012e-4), + static_cast(5.71995514606568751522e-5), + }; + + result = t * tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else if (ilogb(p) >= -3) { + RealType t = -log2(ldexp(p, 2)); + + // Rational Approximation + // Maximum Relative Error: 1.0339e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(3.84521387984759064238e-1), + static_cast(4.15763727809667641126e-1), + static_cast(-1.73610240124046440578e-2), + static_cast(-3.89915764128788049837e-2), + static_cast(1.07252911248451890192e-2), + static_cast(7.62613727089795367882e-4), + static_cast(-3.11382403581073580481e-4), + static_cast(3.93093062843177374871e-5), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(6.76193897442484823754e-1), + static_cast(3.70953499602257825764e-2), + static_cast(-2.84211795745477605398e-2), + static_cast(2.66146101014551209760e-3), + static_cast(1.85436727973937413751e-3), + static_cast(2.00318687649825430725e-4), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else if (ilogb(p) >= -4) { + RealType t = -log2(ldexp(p, 3)); + + // Rational Approximation + // Maximum Relative Error: 1.4431e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(4.46943301497773314460e-1), + static_cast(-1.07267614417424412546e-2), + static_cast(-7.21097021064631831756e-2), + static_cast(2.93948745441334193469e-2), + static_cast(-7.33259305010485915480e-4), + static_cast(-1.38660725579083612045e-3), + static_cast(2.95410432808739478857e-4), + static_cast(-2.88688017391292485867e-5), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(-2.72809429017073648893e-2), + static_cast(-7.85526213469762960803e-2), + static_cast(2.41360900478283465241e-2), + static_cast(3.44597797125179611095e-3), + static_cast(-8.65046428689780375806e-4), + static_cast(-1.04147382037315517658e-4), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else if (ilogb(p) >= -6) { + RealType t = -log2(ldexp(p, 4)); + + // Rational Approximation + // Maximum Relative Error: 4.8871e-17 + BOOST_MATH_STATIC const RealType P[10] = { + static_cast(4.25344469980677332786e-1), + static_cast(3.42055470008289997369e-2), + static_cast(9.33607217644370441642e-2), + static_cast(4.57057092587794346086e-2), + static_cast(1.16149976708336017542e-2), + static_cast(6.40479797962035786337e-3), + static_cast(1.58526153828271386329e-3), + static_cast(3.84032908993313260466e-4), + static_cast(6.98960839033991110525e-5), + static_cast(9.66690587477825432174e-6), + }; + BOOST_MATH_STATIC const RealType Q[10] = { + static_cast(1.), + static_cast(1.60044610004497775009e-1), + static_cast(2.41675490962065446592e-1), + static_cast(1.13752642382290596388e-1), + static_cast(4.05058759031434785584e-2), + static_cast(1.59432816225295660111e-2), + static_cast(4.79286678946992027479e-3), + static_cast(1.16048151070154814260e-3), + static_cast(2.01755520912887201472e-4), + static_cast(2.82884561026909054732e-5), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else if (ilogb(p) >= -8) { + RealType t = -log2(ldexp(p, 6)); + + // Rational Approximation + // Maximum Relative Error: 4.8173e-17 + BOOST_MATH_STATIC const RealType P[9] = { + static_cast(3.68520435599726877886e-1), + static_cast(8.26682725061327242371e-1), + static_cast(6.85235826889543887309e-1), + static_cast(3.28640408399661746210e-1), + static_cast(9.04801242897407528807e-2), + static_cast(1.57470088502958130451e-2), + static_cast(1.61541023176880542598e-3), + static_cast(9.78919203915954346945e-5), + static_cast(9.71371309261213597491e-8), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1.), + static_cast(2.29132755303753682133e0), + static_cast(1.95530118226232968288e0), + static_cast(9.55029685883545321419e-1), + static_cast(2.68254036588585643328e-1), + static_cast(4.61398419640231283164e-2), + static_cast(4.66131710581568432246e-3), + static_cast(2.94491397241310968725e-4), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else if (ilogb(p) >= -16) { + RealType t = -log2(ldexp(p, 8)); + + // Rational Approximation + // Maximum Relative Error: 6.0376e-17 + BOOST_MATH_STATIC const RealType P[10] = { + static_cast(3.48432718168951419458e-1), + static_cast(2.99680703419193973028e-1), + static_cast(1.09531896991852433149e-1), + static_cast(2.28766133215975559897e-2), + static_cast(3.09836969941710802698e-3), + static_cast(2.89346186674853481383e-4), + static_cast(1.96344583080243707169e-5), + static_cast(9.48415601271652569275e-7), + static_cast(3.08821091232356755783e-8), + static_cast(5.58003465656339818416e-10), + }; + BOOST_MATH_STATIC const RealType Q[10] = { + static_cast(1.), + static_cast(8.73938978582311007855e-1), + static_cast(3.21771888210250878162e-1), + static_cast(6.70432401844821772827e-2), + static_cast(9.05369648218831664411e-3), + static_cast(8.50098390828726795296e-4), + static_cast(5.73568804840571459050e-5), + static_cast(2.78374120155590875053e-6), + static_cast(9.03427646135263412003e-8), + static_cast(1.63556457120944847882e-9), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else if (ilogb(p) >= -32) { + RealType t = -log2(ldexp(p, 16)); + + // Rational Approximation + // Maximum Relative Error: 2.2804e-17 + BOOST_MATH_STATIC const RealType P[10] = { + static_cast(3.41419813138786920868e-1), + static_cast(1.30219412019722274099e-1), + static_cast(2.36047671342109636195e-2), + static_cast(2.67913051721210953893e-3), + static_cast(2.10896260337301129968e-4), + static_cast(1.19804595761611765179e-5), + static_cast(4.91470756460287578143e-7), + static_cast(1.38299844947707591018e-8), + static_cast(2.25766283556816829070e-10), + static_cast(-8.46510608386806647654e-18), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1.), + static_cast(3.81461950831351846380e-1), + static_cast(6.91390438866520696447e-2), + static_cast(7.84798596829449138229e-3), + static_cast(6.17735117400536913546e-4), + static_cast(3.50937328177439258136e-5), + static_cast(1.43958654321452532854e-6), + static_cast(4.05109749922716264456e-8), + static_cast(6.61306247924109415113e-10), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else if (ilogb(p) >= -64) { + RealType t = -log2(ldexp(p, 32)); + + // Rational Approximation + // Maximum Relative Error: 4.8545e-17 + BOOST_MATH_STATIC const RealType P[9] = { + static_cast(3.41392032051575965049e-1), + static_cast(1.53372256183388434238e-1), + static_cast(3.33822240038718319714e-2), + static_cast(4.66328786929735228532e-3), + static_cast(4.67981207864367711082e-4), + static_cast(3.48119463063280710691e-5), + static_cast(2.17755850282052679342e-6), + static_cast(7.40424342670289242177e-8), + static_cast(4.61294046336533026640e-9), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1.), + static_cast(4.49255524669251621744e-1), + static_cast(9.77826688966262423974e-2), + static_cast(1.36596271675764346980e-2), + static_cast(1.37080296105355418281e-3), + static_cast(1.01970588303201339768e-4), + static_cast(6.37846903580539445994e-6), + static_cast(2.16883897125962281968e-7), + static_cast(1.35121503608967367232e-8), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else { + const BOOST_MATH_STATIC_LOCAL_VARIABLE RealType c = ldexp(cbrt(constants::pi()), 1); + + RealType p_square = p * p; + + if ((boost::math::isnormal)(p_square)) { + result = 1 / (cbrt(p_square) * c); + } + else if (p > 0) { + result = 1 / (cbrt(p) * cbrt(p) * c); + } + else { + result = boost::math::numeric_limits::infinity(); + } + } + + return result; +} + + +template +BOOST_MATH_GPU_ENABLED inline RealType holtsmark_quantile_upper_imp_prec(const RealType& p, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (ilogb(p) >= -2) { + RealType u = -log2(ldexp(p, 1)); + + if (u < 0.5) { + // Rational Approximation + // Maximum Relative Error: 1.7987e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.59789769759815031687162026655576575384e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.23247138049619855169890925442523844619e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.35351935489348780511227763760731136136e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.17321534695821967609074567968260505604e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30930523792327030433989902919481147250e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.47676800034255152477549544991291837378e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.09952071024064609787697026812259269093e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.65479872964217159571026674930672527880e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.30204907832301876030269224513949605725e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.61038349134944320766567917361933431224e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.17242905696479357297850061918336600969e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.43640101589433162893041733511239841220e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.39406616773257816628641556843884616119e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.54871597065387376666252643921309051097e-7), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.06310038178166385607814371094968073940e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06144046990424238286303107360481469219e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17860081295611631017119482265353540470e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.26319639748358310901277622665331115333e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.25962127567362715217159291513550804588e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.65543974081934423010588955830131357921e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.80331848633772107482330422252085368575e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.97426948050874772305317056836660558275e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.10722999873793200671617106731723252507e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.68871255379198546500699434161302033826e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.70190278641952708999014435335172772138e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.11562497711461468804693130702653542297e-7), + }; + // LCOV_EXCL_STOP + result = u * tools::evaluate_polynomial(P, u) / (tools::evaluate_polynomial(Q, u) * cbrt(p * p)); + } + else { + RealType t = u - static_cast (0.5); + + // Rational Approximation + // Maximum Relative Error: 2.5554e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.63490994331899195346399558699533994243e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.68682839419340144322747963938810505658e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.63089084712442063245295709191126453412e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24910510426787025593146475670961782647e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.14005632199839351091767181535761567981e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.88144015238275997284082820907124267240e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.12015895125039876623372795832970536355e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.96386756665254981286292821446749025989e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.82855208595003635135641502084317667629e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.18007513930934295792217002090233670917e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.82563310387467580262182864644541746616e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.52830681121195099547078704713089681353e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.91383571211375811878311159248551586411e-8), + }; + BOOST_MATH_STATIC const RealType Q[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96820655322136936855997114940653763917e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30209571878469737819039455443404070107e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.61235660141139249931521613001554108034e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.31683133997030095798635713869616211197e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.20681979279848555447978496580849290723e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.08958899028812330281115719259773001136e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.02478613175545210977059079339657545008e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.68653479132148912896487809682760117627e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35166554499214836086438565154832646441e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.95409975934011596023165394669416595582e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.84312112139729518216217161835365265801e-7), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + } + else if (ilogb(p) >= -3) { + RealType u = -log2(ldexp(p, 2)); + + if (u < 0.5) { + // Rational Approximation + // Maximum Relative Error: 1.0297e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.84521387984759060262188972210005114936e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.70837834325236202821328032137877091515e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.53856963029219911450181095566096563059e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.97659091653089105048621336944687224192e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.77726241585387617566937892474685179582e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21657224955483589784473724186837316423e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76357400631206366078287330192525531850e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.45967265853745968166172649261385754061e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.08367654892620484522749804048317330020e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41224530727710207304898458924763411052e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.02908228738160003274584644834000176496e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.05702214080592377840761032481067834813e-7), + }; + BOOST_MATH_STATIC const RealType Q[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33954869248363301881659953529609341564e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.73738626674455393272550888585363920917e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.90708494363306682523722238824373341707e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.49559648492983033200126224112060119905e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.07561158260652000950392950266037061167e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.30349651195547682860585068738648645100e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.21766408404123861757376277367204136764e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.22181499366766592894880124261171657846e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.74488053046587079829684775540618210211e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.90504597668186854963746384968119788469e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45195198322028676384075318222338781298e-7), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, u) / (tools::evaluate_polynomial(Q, u) * cbrt(p * p)); + } + else { + RealType t = u - static_cast (0.5); + + // Rational Approximation + // Maximum Relative Error: 1.3688e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.34418795581931891732555950599385666106e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.13006013029934051875748102515422669897e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.27990072710518465265454549585803147529e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.82530244963278920355650323928131927272e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.05335741422175616606162502617378682462e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71242678756797136217651369710748524650e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.65147398836785709305701073315614307906e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.23912765853731378067295654886575185240e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.77861910171412622761254991979036167882e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.11971510714149983297022108523700437739e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23649928279010039670034778778065846828e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.99636080473697209793683863161785312159e-8), + }; + BOOST_MATH_STATIC const RealType Q[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.95056572065373808001002483348789719155e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.55702988004729812458415992666809422570e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.07586989542594910084052301521098115194e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96831670560124470215505714403486118412e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.86445076378084412691927796983792892534e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.75566285003039738258189045863064261980e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.18557444175572723760508226182075127685e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.66667716357950609103712975111660496416e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.70999480357934082364999779023268059131e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.14604868719110256415222454908306045416e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.32724040071094913191419223901752642417e-8), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + } + else if (ilogb(p) >= -4) { + RealType t = -log2(ldexp(p, 3)); + + // Rational Approximation + // Maximum Relative Error: 6.6020e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.46943301497773318715008398224877079279e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.85403413700924949902626248891615772650e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.02791895890363892816315784780533893399e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.89147412486638444082129846251261616763e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.93382251168424191872267997181870008850e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.68332196426082871660060467570049113632e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.88720436260994811649162949644253306037e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.34099304204778307050211441936900839075e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.42970601149275611131932131801993030928e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.19329425598839605828710629592687495198e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.48826007216547106568423189194739111033e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.47132934846160946190230821709692067279e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.34123780321108493820637601375183345528e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.64549285026064221742294542922996905241e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72723306533295983872420985773212608299e-9), + }; + BOOST_MATH_STATIC const RealType Q[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.23756826160440280076231428938184359865e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.46557011055563840763437682311082689407e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18907861669025579159409035585375166964e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.09998981512549500250715800529896557509e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.09496663758959409482213456915225652712e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.37086325651334206453116588474211557676e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65325780110454655811120026458133145750e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.93435549562125602056160657604473721758e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.34967558308250784125219085040752451132e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.73883529653464036447550624641291181317e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.88600727347267778330635397957540267359e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.26681383000234695948685993798733295748e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19871610873353691152255428262732390602e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.42468017918888155246438948321084323623e-9), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else if (ilogb(p) >= -5) { + RealType u = -log2(ldexp(p, 4)); + + if (u < 0.5) { + // Rational Approximation + // Maximum Relative Error: 5.0596e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.25344469980677353573160570139298422046e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.41915371584999983192100443156935649063e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02829239548689190780023994008688591230e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29283473326959885625548350158197923999e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.01078477165670046284950196047161898687e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.02714892887893367912743194877742997622e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.43133417775367444366548711083157149060e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34782994090554432391320506638030058071e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.06742736859237185836735105245477248882e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.55982601406660341132288721616681417444e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.57770758189194396236862269776507019313e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.29311341249565125992213260043135188072e-8), + }; + BOOST_MATH_STATIC const RealType Q[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.54021943144355190773797361537886598583e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30965787836836308380896385568728211303e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.19314242976592846926644622802257778872e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.84123785238634690769817401191138848504e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.75779029464908805680899310810660326192e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15078294915445673781718097749944059134e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.84667183003626452412083824490324913477e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.59521438712225874821007396323337016693e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.90446539427779905568600432145715126083e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.21425779911599424040614866482614099753e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.00972806247654369646317764344373036462e-8), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, u) / (tools::evaluate_polynomial(Q, u) * cbrt(p * p)); + } + else { + RealType t = u - static_cast (0.5); + + // Rational Approximation + // Maximum Relative Error: 8.3743e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.08071367192424306005939751362206079160e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.94625900993512461462097316785202943274e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.55970241156822104458842450713854737857e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.07663066299810473476390199553510422731e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.89859986209620592557993828310690990189e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.04002735956724252558290154433164340078e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.28754717941144647796091692241880059406e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19307116062867039608045413276099792797e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.02377178609994923303160815309590928289e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.71739619655097982325716241977619135216e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.70229045058419872036870274360537396648e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.90495731447121207951661931979310025968e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23210708203609461650368387780135568863e-8), + }; + BOOST_MATH_STATIC const RealType Q[11] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.93402256203255215539822867473993726421e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.42452702043886045884356307934634512995e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.16981055684612802160174937997247813645e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39560623514414816165791968511612762553e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.26014275897567952035148355055139912545e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.42163967753843746501638925686714935099e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.63605648300801696460942201096159808446e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.55933967787268788177266789383155699064e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.41526208021076709058374666903111908743e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.08505866202670144225100385141263360218e-6), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + } + else if (ilogb(p) >= -6) { + RealType t = -log2(ldexp(p, 5)); + + // Rational Approximation + // Maximum Relative Error: 2.4734e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[16] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.92042979500197776619414802317216082414e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.94742044285563829335663810275331541585e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.14525306632578654372860377652983462776e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.88893010132758460781753381176593178775e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08491462791290535107958214106528611951e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.61374431854187722720094162894017991926e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11641062509116613779440753514902522337e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.12474548036763970495563846370119556004e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.48140831258790372410036499310440980121e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.26913338169355215445128368312197650848e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63109797282729701768942543985418804075e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.55296802973076575732233624155433324402e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72108609713971908723724065216410393928e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.93328436272999507339897246655916666269e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72119240610740992234979508242967886200e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17836139198065889244530078295061548097e-10), + }; + BOOST_MATH_STATIC const RealType Q[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78065342260594920160228973261455037923e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.08575070304822733863613657779515344137e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.81185785915044621118680763035984134530e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.87597191269586886460326897968559867853e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07903258768761230286548634868645339678e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.88395769450457864233486684232536503140e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.05678227243099671420442217017131559055e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.24803207742284923122212652186826674987e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06094715338829793088081672723947647238e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.96454433858093590192363331553516923090e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.94509901530299070041475386866323617753e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.49001710126540196485963921184736711193e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.58899179756014192338509671769986887613e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.06916561094749601736592488829778059190e-8), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else if (ilogb(p) >= -8) { + RealType t = -log2(ldexp(p, 6)); + + // Rational Approximation + // Maximum Relative Error: 1.1570e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.68520435599726860132888599110871216319e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.01076105507184082206031922185510102322e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39912455237662038937400667644545834191e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.51088991221663244634723139723207272560e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26465949648856746869050310379379898086e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.37079746226805258449355819952819997723e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.49372033421420312720741838903118544951e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.95729572745049276972587492142384353131e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.92794840197452838799536047152725573779e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96897979363475104635129765703613472468e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.44138843334474914059035559588791041371e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.78076328055619970057667292651627051391e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04182093251998194244585085400876144351e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04392999917657413659748817212746660436e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.76006125565969084470924344826977844710e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.21045181507045010640119572995692565368e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.61400097324698003962179537436043636306e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.88084230973635340409728710734906398080e-11), + }; + BOOST_MATH_STATIC const RealType Q[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.49319798750825059930589954921919984293e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.90218243410186000622818205955425584848e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.25384789213915993855434876209137054104e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.58563858782064482133038568901836564329e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.39112608961600614189971858070197609546e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29192895265168981204927382938872469754e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.66418375973954918346810939649929797237e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.01606040038159207768769492693779323748e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.75837675697421536953171865636865644576e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30258315910281295093103384193132807400e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.28333635097670841003561009290200071343e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.04871369296490431325621140782944603554e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.05077352164673794093561693258318905067e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.28508157403208548483052311164947568580e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22527248376737724147359908626095469985e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.75479484339716254784610505187249810386e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.39990051830081888581639577552526319577e-11), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else if (ilogb(p) >= -16) { + RealType t = -log2(ldexp(p, 8)); + + // Rational Approximation + // Maximum Relative Error: 1.1362e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[22] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.48432718168951398420402661878962745094e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.55946442453078865766668586202885528338e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.54912640113904816247923987542554486059e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.60852745978561293262851287627328856197e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.93256608166097432329211369307994852513e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.94707001299612588571704157159595918562e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40368387009950846525432054396214443833e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.62326983889228773089492130483459202197e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.97166112600628615762158757484340724056e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.95053681446806610424931810174198926457e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.13613767164027076487881255767029235747e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.46627172639536503825606138995804926378e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.26813757095977946534946955553296696736e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.01393212063713249666862633388902006492e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04428602119155661411061942866480445477e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.52977051350929618206095556763031195967e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63092013964238065197415324341392517794e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.77457116423818347179318334884304764609e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10372468210274291890669895933038762772e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85517152798650696598776156882211719502e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.01916800572423194619358228507804954863e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.72241483171311778625855302356391965266e-26), + }; + BOOST_MATH_STATIC const RealType Q[21] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.18341916009800042837726003154518652168e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19215655980509256344434487727207541208e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34380326549827252189214516628038733750e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.65069135930665131327262366757787760402e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74132905027750048531814627726862962404e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.11184893124573373947875834716323223477e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.68456853089572312034718359282699132364e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16340806625223749486884390838046244494e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45007766006724826837429360471785418874e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50443251593190111677537955057976277305e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30829464745241179175728900376502542995e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.57256894336319418553622695416919409120e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.89973763917908403951538315949652981312e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.05834675579622824896206540981508286215e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32721343511724613011656816221169980981e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.77569292346900432492044041866264215291e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39903737668944675386972393000746368518e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.23200083621376582032771041306045737695e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.43557805626692790539354751731913075096e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.91326410956582998375100191562832969140e-20), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else if (ilogb(p) >= -32) { + RealType t = -log2(ldexp(p, 16)); + + // Rational Approximation + // Maximum Relative Error: 9.1729e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[19] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.41419813138786928653984591611599949126e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.94225020281693988785012368481961427155e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.28967134188573605597955859185818311256e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.16617725083935565014535265818666424029e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13379610773944032381149443514208866162e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06508483032198116332154635763926628153e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.89315471210589177037346413966039863126e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72993906450633221200844495419180873066e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.32883391567312244751716481903540505335e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.50998889887280885500990101116973130081e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.23247766687180294767338042555173653249e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.12326475887709255500757383109178584638e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11688319088825228685832870139320733695e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.95381542569360703428852622701723193645e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.70730387484749668293167350494151199659e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.84243405919322052861165273432136993833e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.40860917180131228318146854666419586211e-22), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.85122374200561402546731933480737679849e-30), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.79744248200459077556218062241428072826e-32), + }; + BOOST_MATH_STATIC const RealType Q[17] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.68930884381361438749954611436694811868e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.54944129151720429074748655153760118465e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.68493670923968273171437877298940102712e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.32109946297461941811102221103314572340e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.11983030120265263999033828442555862122e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.31204888358097171713697195034681853057e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.38548604936907265274059071726622071821e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.82158855359673890472124017801768455208e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78564556026252472894386810079914912632e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.46854501887011863360558947087254908412e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.67236380279070121978196383998000020645e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.20076045812548485396837897240357026254e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15814123143437217877762088763846289858e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.67177999717442465582949551415385496304e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71135001552136641449927514544850663366e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.98449056954034104266783180068258117013e-22), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else if (ilogb(p) >= -64) { + RealType t = -log2(ldexp(p, 32)); + + // Rational Approximation + // Maximum Relative Error: 1.8330e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[19] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.41392032051575981622151194498090952488e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32651097995974052731414709779952524875e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.51927763729719814565225981452897995722e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.11148082477882981299945196621348531180e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.80457559655975695558885644380771202301e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96223001525552834934139567532649816367e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10625449265784963560596299595289620029e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.14785887121654524328854820350425279893e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.00832358736396150660417651391240544392e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.63128732906298604011217701767305935851e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.86148432181465165445355560568442172406e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.44088921565424320298916604159745842835e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.95220124898384051195673049864765987092e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50531060529388128674128631193212903032e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06119051130826148039530805693452156757e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19849873960405145967462029876325494393e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66833600176986734600260382043861669021e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.07829060832934383885234817363480653925e-26), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21485411177823993142696645934560017341e-40), + }; + BOOST_MATH_STATIC const RealType Q[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.88559444380290379529260819350179144435e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.37942717465159991856146428659881557553e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.11409915376157429952160202733757574026e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.21511733003564236929107862750700281202e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.74773232555012468159223116269289241483e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.24042271031862389840796415749527818562e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50790246845873571117791557191071320982e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.88274886666078071130557536971927872847e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.94242592538917360235050248151146832636e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.45262967284548223426004177385213311949e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30081806380053435857465845326686775489e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.62226426496757450797456131921060042081e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.40933140159573381494354127717542598424e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.03760580312376891985077265621432029857e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.43980683769941233230954109646012150124e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.88686274782816858372719510890126716148e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.07336140055510452905474533727353308321e-25), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else if (ilogb(p) >= -128) { + RealType t = -log2(ldexp(p, 64)); + + // Rational Approximation + // Maximum Relative Error: 5.9085e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.41392031627647840832213878541731833340e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48256908849985263191468999842405689327e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.16515822909144946601084169745484248278e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.42246334265547596187501472291026180697e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.54145961608971551335283437288203286104e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.64840354062369555376354747633807898689e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.38246669464526050793398379055335943951e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29684566081664150074215568847731661446e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.98456331768093420851844051941851740455e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36738267296531031235518935656891979319e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.08128287278026286279504717089979753319e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.38334581618709868951669630969696873534e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.08478537820365448038773095902465198679e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30768169494950935152733510713679558562e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.49254243621461466892836128222648688091e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.17026357413798368802986708112771803774e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.05630817682870951728748696694117980745e-22), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.13881361534205323565985756195674181203e-50), + }; + BOOST_MATH_STATIC const RealType Q[17] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.34271731953273239599863811873205236246e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.27133013035186849060586077266046297964e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29542078693828543540010668640353491847e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33027698228265344545932885863767276804e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06868444562964057780556916100143215394e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.97868278672593071061800234869603536243e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.79869926850283188735312536038469293739e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.75298857713475428365153491580710497759e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.93449891515741631851202042430818496480e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36715626731277089013724968542144140938e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.98125789528264426869121548546848968670e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78234546049400950521459021508632294206e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.83044000387150792643468853129175805308e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.30111486296552039388613073915170671881e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.28628462422858134962149154420358876352e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48108558735886480279744474396456699335e-21), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else { + const BOOST_MATH_STATIC_LOCAL_VARIABLE RealType c = ldexp(cbrt(constants::pi()), 1); + + RealType p_square = p * p; + + if ((boost::math::isnormal)(p_square)) { + result = 1 / (cbrt(p_square) * c); + } + else if (p > 0) { + result = 1 / (cbrt(p) * cbrt(p) * c); + } + else { + result = boost::math::numeric_limits::infinity(); + } + } + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType holtsmark_quantile_imp_prec(const RealType& p, bool complement, const boost::math::integral_constant& tag) +{ + if (p > 0.5) { + return !complement ? holtsmark_quantile_upper_imp_prec(1 - p, tag) : -holtsmark_quantile_upper_imp_prec(1 - p, tag); + } + + return complement ? holtsmark_quantile_upper_imp_prec(p, tag) : -holtsmark_quantile_upper_imp_prec(p, tag); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType holtsmark_quantile_imp_prec(const RealType& p, bool complement, const boost::math::integral_constant& tag) +{ + if (p > 0.5) { + return !complement ? holtsmark_quantile_upper_imp_prec(1 - p, tag) : -holtsmark_quantile_upper_imp_prec(1 - p, tag); + } + + return complement ? holtsmark_quantile_upper_imp_prec(p, tag) : -holtsmark_quantile_upper_imp_prec(p, tag); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType holtsmark_quantile_imp(const holtsmark_distribution& dist, const RealType& p, bool complement) +{ + // This routine implements the quantile for the Holtsmark distribution, + // the value p may be the probability, or its complement if complement=true. + + constexpr auto function = "boost::math::quantile(holtsmark<%1%>&, %1%)"; + BOOST_MATH_STD_USING // for ADL of std functions + + RealType result = 0; + RealType scale = dist.scale(); + RealType location = dist.location(); + + if (false == detail::check_location(function, location, &result, Policy())) + { + return result; + } + if (false == detail::check_scale(function, scale, &result, Policy())) + { + return result; + } + if (false == detail::check_probability(function, p, &result, Policy())) + { + return result; + } + + typedef typename tools::promote_args::type result_type; + typedef typename policies::precision::type precision_type; + typedef boost::math::integral_constant tag_type; + + static_assert(tag_type::value, "The Holtsmark distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats"); + + result = location + scale * holtsmark_quantile_imp_prec(p, complement, tag_type()); + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType holtsmark_entropy_imp_prec(const boost::math::integral_constant&) +{ + return static_cast(2.06944850513462440032); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType holtsmark_entropy_imp_prec(const boost::math::integral_constant&) +{ + return BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.0694485051346244003155800384542166381); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType holtsmark_entropy_imp(const holtsmark_distribution& dist) +{ + // This implements the entropy for the Holtsmark distribution, + + constexpr auto function = "boost::math::entropy(holtsmark<%1%>&, %1%)"; + BOOST_MATH_STD_USING // for ADL of std functions + + RealType result = 0; + RealType scale = dist.scale(); + + if (false == detail::check_scale(function, scale, &result, Policy())) + { + return result; + } + + typedef typename tools::promote_args::type result_type; + typedef typename policies::precision::type precision_type; + typedef boost::math::integral_constant tag_type; + + static_assert(tag_type::value, "The Holtsmark distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats"); + + result = holtsmark_entropy_imp_prec(tag_type()) + log(scale); + + return result; +} + +} // detail + +template > +class holtsmark_distribution +{ + public: + typedef RealType value_type; + typedef Policy policy_type; + + BOOST_MATH_GPU_ENABLED holtsmark_distribution(RealType l_location = 0, RealType l_scale = 1) + : mu(l_location), c(l_scale) + { + constexpr auto function = "boost::math::holtsmark_distribution<%1%>::holtsmark_distribution"; + RealType result = 0; + detail::check_location(function, l_location, &result, Policy()); + detail::check_scale(function, l_scale, &result, Policy()); + } // holtsmark_distribution + + BOOST_MATH_GPU_ENABLED RealType location()const + { + return mu; + } + BOOST_MATH_GPU_ENABLED RealType scale()const + { + return c; + } + + private: + RealType mu; // The location parameter. + RealType c; // The scale parameter. +}; + +typedef holtsmark_distribution holtsmark; + +#ifdef __cpp_deduction_guides +template +holtsmark_distribution(RealType) -> holtsmark_distribution::type>; +template +holtsmark_distribution(RealType, RealType) -> holtsmark_distribution::type>; +#endif + +template +BOOST_MATH_GPU_ENABLED inline const boost::math::pair range(const holtsmark_distribution&) +{ // Range of permissible values for random variable x. + BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits::has_infinity) + { + return boost::math::pair(-boost::math::numeric_limits::infinity(), boost::math::numeric_limits::infinity()); // - to + infinity. + } + else + { // Can only use max_value. + using boost::math::tools::max_value; + return boost::math::pair(-max_value(), max_value()); // - to + max. + } +} + +template +BOOST_MATH_GPU_ENABLED inline const boost::math::pair support(const holtsmark_distribution&) +{ // Range of supported values for random variable x. + // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. + BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits::has_infinity) + { + return boost::math::pair(-boost::math::numeric_limits::infinity(), boost::math::numeric_limits::infinity()); // - to + infinity. + } + else + { // Can only use max_value. + using boost::math::tools::max_value; + return boost::math::pair(-tools::max_value(), max_value()); // - to + max. + } +} + +template +BOOST_MATH_GPU_ENABLED inline RealType pdf(const holtsmark_distribution& dist, const RealType& x) +{ + return detail::holtsmark_pdf_imp(dist, x); +} // pdf + +template +BOOST_MATH_GPU_ENABLED inline RealType cdf(const holtsmark_distribution& dist, const RealType& x) +{ + return detail::holtsmark_cdf_imp(dist, x, false); +} // cdf + +template +BOOST_MATH_GPU_ENABLED inline RealType quantile(const holtsmark_distribution& dist, const RealType& p) +{ + return detail::holtsmark_quantile_imp(dist, p, false); +} // quantile + +template +BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) +{ + return detail::holtsmark_cdf_imp(c.dist, c.param, true); +} // cdf complement + +template +BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) +{ + return detail::holtsmark_quantile_imp(c.dist, c.param, true); +} // quantile complement + +template +BOOST_MATH_GPU_ENABLED inline RealType mean(const holtsmark_distribution &dist) +{ + return dist.location(); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType variance(const holtsmark_distribution& /*dist*/) +{ + return boost::math::numeric_limits::infinity(); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mode(const holtsmark_distribution& dist) +{ + return dist.location(); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType median(const holtsmark_distribution& dist) +{ + return dist.location(); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType skewness(const holtsmark_distribution& /*dist*/) +{ + // There is no skewness: + typedef typename Policy::assert_undefined_type assert_type; + static_assert(assert_type::value == 0, "The Holtsmark Distribution has no skewness"); + + return policies::raise_domain_error( + "boost::math::skewness(holtsmark<%1%>&)", + "The Holtsmark distribution does not have a skewness: " + "the only possible return value is %1%.", + boost::math::numeric_limits::quiet_NaN(), Policy()); // infinity? +} + +template +BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const holtsmark_distribution& /*dist*/) +{ + // There is no kurtosis: + typedef typename Policy::assert_undefined_type assert_type; + static_assert(assert_type::value == 0, "The Holtsmark Distribution has no kurtosis"); + + return policies::raise_domain_error( + "boost::math::kurtosis(holtsmark<%1%>&)", + "The Holtsmark distribution does not have a kurtosis: " + "the only possible return value is %1%.", + boost::math::numeric_limits::quiet_NaN(), Policy()); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const holtsmark_distribution& /*dist*/) +{ + // There is no kurtosis excess: + typedef typename Policy::assert_undefined_type assert_type; + static_assert(assert_type::value == 0, "The Holtsmark Distribution has no kurtosis excess"); + + return policies::raise_domain_error( + "boost::math::kurtosis_excess(holtsmark<%1%>&)", + "The Holtsmark distribution does not have a kurtosis: " + "the only possible return value is %1%.", + boost::math::numeric_limits::quiet_NaN(), Policy()); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType entropy(const holtsmark_distribution& dist) +{ + return detail::holtsmark_entropy_imp(dist); +} + +}} // namespaces + + +#endif // BOOST_STATS_HOLTSMARK_HPP diff --git a/include/boost/math/distributions/inverse_chi_squared.hpp b/include/boost/math/distributions/inverse_chi_squared.hpp index 19dd0371e8..1a3c680d23 100644 --- a/include/boost/math/distributions/inverse_chi_squared.hpp +++ b/include/boost/math/distributions/inverse_chi_squared.hpp @@ -1,6 +1,6 @@ // Copyright John Maddock 2010. // Copyright Paul A. Bristow 2010. - +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt @@ -9,6 +9,8 @@ #ifndef BOOST_MATH_DISTRIBUTIONS_INVERSE_CHI_SQUARED_HPP #define BOOST_MATH_DISTRIBUTIONS_INVERSE_CHI_SQUARED_HPP +#include +#include #include #include // for incomplete beta. #include // for complements. @@ -24,14 +26,12 @@ // Weisstein, Eric W. "Inverse Chi-Squared Distribution." From MathWorld--A Wolfram Web Resource. // http://mathworld.wolfram.com/InverseChi-SquaredDistribution.html -#include - namespace boost{ namespace math{ namespace detail { template - inline bool check_inverse_chi_squared( // Check both distribution parameters. + BOOST_MATH_GPU_ENABLED inline bool check_inverse_chi_squared( // Check both distribution parameters. const char* function, RealType degrees_of_freedom, // degrees_of_freedom (aka nu). RealType scale, // scale (aka sigma^2) @@ -51,7 +51,7 @@ class inverse_chi_squared_distribution typedef RealType value_type; typedef Policy policy_type; - inverse_chi_squared_distribution(RealType df, RealType l_scale) : m_df(df), m_scale (l_scale) + BOOST_MATH_GPU_ENABLED inverse_chi_squared_distribution(RealType df, RealType l_scale) : m_df(df), m_scale (l_scale) { RealType result; detail::check_df( @@ -62,7 +62,7 @@ class inverse_chi_squared_distribution m_scale, &result, Policy()); } // inverse_chi_squared_distribution constructor - inverse_chi_squared_distribution(RealType df = 1) : m_df(df) + BOOST_MATH_GPU_ENABLED inverse_chi_squared_distribution(RealType df = 1) : m_df(df) { RealType result; m_scale = 1 / m_df ; // Default scale = 1 / degrees of freedom (Wikipedia definition 1). @@ -71,11 +71,11 @@ class inverse_chi_squared_distribution m_df, &result, Policy()); } // inverse_chi_squared_distribution - RealType degrees_of_freedom()const + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom()const { return m_df; // aka nu } - RealType scale()const + BOOST_MATH_GPU_ENABLED RealType scale()const { return m_scale; // aka xi } @@ -105,28 +105,28 @@ inverse_chi_squared_distribution(RealType,RealType)->inverse_chi_squared_distrib #endif template -inline const std::pair range(const inverse_chi_squared_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline const boost::math::pair range(const inverse_chi_squared_distribution& /*dist*/) { // Range of permissible values for random variable x. using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); // 0 to + infinity. + return boost::math::pair(static_cast(0), max_value()); // 0 to + infinity. } template -inline const std::pair support(const inverse_chi_squared_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline const boost::math::pair support(const inverse_chi_squared_distribution& /*dist*/) { // Range of supported values for random variable x. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. - return std::pair(static_cast(0), tools::max_value()); // 0 to + infinity. + return boost::math::pair(static_cast(0), tools::max_value()); // 0 to + infinity. } template -RealType pdf(const inverse_chi_squared_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED RealType pdf(const inverse_chi_squared_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions. RealType df = dist.degrees_of_freedom(); RealType scale = dist.scale(); RealType error_result; - static const char* function = "boost::math::pdf(const inverse_chi_squared_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::pdf(const inverse_chi_squared_distribution<%1%>&, %1%)"; if(false == detail::check_inverse_chi_squared (function, df, scale, &error_result, Policy()) @@ -159,9 +159,9 @@ RealType pdf(const inverse_chi_squared_distribution& dist, con } // pdf template -inline RealType cdf(const inverse_chi_squared_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const inverse_chi_squared_distribution& dist, const RealType& x) { - static const char* function = "boost::math::cdf(const inverse_chi_squared_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const inverse_chi_squared_distribution<%1%>&, %1%)"; RealType df = dist.degrees_of_freedom(); RealType scale = dist.scale(); RealType error_result; @@ -188,13 +188,13 @@ inline RealType cdf(const inverse_chi_squared_distribution& di } // cdf template -inline RealType quantile(const inverse_chi_squared_distribution& dist, const RealType& p) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const inverse_chi_squared_distribution& dist, const RealType& p) { using boost::math::gamma_q_inv; RealType df = dist.degrees_of_freedom(); RealType scale = dist.scale(); - static const char* function = "boost::math::quantile(const inverse_chi_squared_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const inverse_chi_squared_distribution<%1%>&, %1%)"; // Error check: RealType error_result; if(false == detail::check_df( @@ -220,13 +220,13 @@ inline RealType quantile(const inverse_chi_squared_distribution -inline RealType cdf(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { using boost::math::gamma_q_inv; RealType const& df = c.dist.degrees_of_freedom(); RealType const& scale = c.dist.scale(); RealType const& x = c.param; - static const char* function = "boost::math::cdf(const inverse_chi_squared_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const inverse_chi_squared_distribution<%1%>&, %1%)"; // Error check: RealType error_result; if(false == detail::check_df( @@ -251,14 +251,14 @@ inline RealType cdf(const complemented2_type -inline RealType quantile(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { using boost::math::gamma_q_inv; RealType const& df = c.dist.degrees_of_freedom(); RealType const& scale = c.dist.scale(); RealType const& q = c.param; - static const char* function = "boost::math::quantile(const inverse_chi_squared_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const inverse_chi_squared_distribution<%1%>&, %1%)"; // Error check: RealType error_result; if(false == detail::check_df(function, df, &error_result, Policy())) @@ -280,12 +280,12 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const inverse_chi_squared_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mean(const inverse_chi_squared_distribution& dist) { // Mean of inverse Chi-Squared distribution. RealType df = dist.degrees_of_freedom(); RealType scale = dist.scale(); - static const char* function = "boost::math::mean(const inverse_chi_squared_distribution<%1%>&)"; + constexpr auto function = "boost::math::mean(const inverse_chi_squared_distribution<%1%>&)"; if(df <= 2) return policies::raise_domain_error( function, @@ -295,11 +295,11 @@ inline RealType mean(const inverse_chi_squared_distribution& d } // mean template -inline RealType variance(const inverse_chi_squared_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType variance(const inverse_chi_squared_distribution& dist) { // Variance of inverse Chi-Squared distribution. RealType df = dist.degrees_of_freedom(); RealType scale = dist.scale(); - static const char* function = "boost::math::variance(const inverse_chi_squared_distribution<%1%>&)"; + constexpr auto function = "boost::math::variance(const inverse_chi_squared_distribution<%1%>&)"; if(df <= 4) { return policies::raise_domain_error( @@ -311,14 +311,14 @@ inline RealType variance(const inverse_chi_squared_distribution -inline RealType mode(const inverse_chi_squared_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mode(const inverse_chi_squared_distribution& dist) { // mode is not defined in Mathematica. // See Discussion section http://en.wikipedia.org/wiki/Talk:Scaled-inverse-chi-square_distribution // for origin of the formula used below. RealType df = dist.degrees_of_freedom(); RealType scale = dist.scale(); - static const char* function = "boost::math::mode(const inverse_chi_squared_distribution<%1%>&)"; + constexpr auto function = "boost::math::mode(const inverse_chi_squared_distribution<%1%>&)"; if(df < 0) return policies::raise_domain_error( function, @@ -341,11 +341,11 @@ inline RealType mode(const inverse_chi_squared_distribution& d // Now implemented via quantile(half) in derived accessors. template -inline RealType skewness(const inverse_chi_squared_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType skewness(const inverse_chi_squared_distribution& dist) { BOOST_MATH_STD_USING // For ADL RealType df = dist.degrees_of_freedom(); - static const char* function = "boost::math::skewness(const inverse_chi_squared_distribution<%1%>&)"; + constexpr auto function = "boost::math::skewness(const inverse_chi_squared_distribution<%1%>&)"; if(df <= 6) return policies::raise_domain_error( function, @@ -356,10 +356,10 @@ inline RealType skewness(const inverse_chi_squared_distribution -inline RealType kurtosis(const inverse_chi_squared_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const inverse_chi_squared_distribution& dist) { RealType df = dist.degrees_of_freedom(); - static const char* function = "boost::math::kurtosis(const inverse_chi_squared_distribution<%1%>&)"; + constexpr auto function = "boost::math::kurtosis(const inverse_chi_squared_distribution<%1%>&)"; if(df <= 8) return policies::raise_domain_error( function, @@ -370,10 +370,10 @@ inline RealType kurtosis(const inverse_chi_squared_distribution -inline RealType kurtosis_excess(const inverse_chi_squared_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const inverse_chi_squared_distribution& dist) { RealType df = dist.degrees_of_freedom(); - static const char* function = "boost::math::kurtosis(const inverse_chi_squared_distribution<%1%>&)"; + constexpr auto function = "boost::math::kurtosis(const inverse_chi_squared_distribution<%1%>&)"; if(df <= 8) return policies::raise_domain_error( function, diff --git a/include/boost/math/distributions/inverse_gamma.hpp b/include/boost/math/distributions/inverse_gamma.hpp index 8c9e4763d5..6aa798ed82 100644 --- a/include/boost/math/distributions/inverse_gamma.hpp +++ b/include/boost/math/distributions/inverse_gamma.hpp @@ -2,6 +2,7 @@ // Copyright Paul A. Bristow 2010. // Copyright John Maddock 2010. +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -22,21 +23,21 @@ // http://mathworld.wolfram.com/GammaDistribution.html // http://en.wikipedia.org/wiki/Gamma_distribution +#include +#include +#include #include #include #include #include -#include -#include - namespace boost{ namespace math { namespace detail { template -inline bool check_inverse_gamma_shape( +BOOST_MATH_GPU_ENABLED inline bool check_inverse_gamma_shape( const char* function, // inverse_gamma RealType shape, // shape aka alpha RealType* result, // to update, perhaps with NaN @@ -57,7 +58,7 @@ inline bool check_inverse_gamma_shape( } //bool check_inverse_gamma_shape template -inline bool check_inverse_gamma_x( +BOOST_MATH_GPU_ENABLED inline bool check_inverse_gamma_x( const char* function, RealType const& x, RealType* result, const Policy& pol) @@ -73,7 +74,7 @@ inline bool check_inverse_gamma_x( } template -inline bool check_inverse_gamma( +BOOST_MATH_GPU_ENABLED inline bool check_inverse_gamma( const char* function, // TODO swap these over, so shape is first. RealType scale, // scale aka beta RealType shape, // shape aka alpha @@ -92,7 +93,7 @@ class inverse_gamma_distribution using value_type = RealType; using policy_type = Policy; - explicit inverse_gamma_distribution(RealType l_shape = 1, RealType l_scale = 1) + BOOST_MATH_GPU_ENABLED explicit inverse_gamma_distribution(RealType l_shape = 1, RealType l_scale = 1) : m_shape(l_shape), m_scale(l_scale) { RealType result; @@ -101,12 +102,12 @@ class inverse_gamma_distribution l_scale, l_shape, &result, Policy()); } - RealType shape()const + BOOST_MATH_GPU_ENABLED RealType shape()const { return m_shape; } - RealType scale()const + BOOST_MATH_GPU_ENABLED RealType scale()const { return m_scale; } @@ -132,27 +133,27 @@ inverse_gamma_distribution(RealType,RealType)->inverse_gamma_distribution -inline std::pair range(const inverse_gamma_distribution& /* dist */) +BOOST_MATH_GPU_ENABLED inline boost::math::pair range(const inverse_gamma_distribution& /* dist */) { // Range of permissible values for random variable x. using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); + return boost::math::pair(static_cast(0), max_value()); } template -inline std::pair support(const inverse_gamma_distribution& /* dist */) +BOOST_MATH_GPU_ENABLED inline boost::math::pair support(const inverse_gamma_distribution& /* dist */) { // Range of supported values for random variable x. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. using boost::math::tools::max_value; using boost::math::tools::min_value; - return std::pair(static_cast(0), max_value()); + return boost::math::pair(static_cast(0), max_value()); } template -inline RealType pdf(const inverse_gamma_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType pdf(const inverse_gamma_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::pdf(const inverse_gamma_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::pdf(const inverse_gamma_distribution<%1%>&, %1%)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -195,17 +196,17 @@ inline RealType pdf(const inverse_gamma_distribution& dist, co } // pdf template -inline RealType logpdf(const inverse_gamma_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType logpdf(const inverse_gamma_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions using boost::math::lgamma; - static const char* function = "boost::math::logpdf(const inverse_gamma_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::logpdf(const inverse_gamma_distribution<%1%>&, %1%)"; RealType shape = dist.shape(); RealType scale = dist.scale(); - RealType result = -std::numeric_limits::infinity(); + RealType result = -boost::math::numeric_limits::infinity(); if(false == detail::check_inverse_gamma(function, scale, shape, &result, Policy())) { // distribution parameters bad. return result; @@ -232,11 +233,11 @@ inline RealType logpdf(const inverse_gamma_distribution& dist, } // pdf template -inline RealType cdf(const inverse_gamma_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const inverse_gamma_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(const inverse_gamma_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const inverse_gamma_distribution<%1%>&, %1%)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -260,12 +261,12 @@ inline RealType cdf(const inverse_gamma_distribution& dist, co } // cdf template -inline RealType quantile(const inverse_gamma_distribution& dist, const RealType& p) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const inverse_gamma_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std functions using boost::math::gamma_q_inv; - static const char* function = "boost::math::quantile(const inverse_gamma_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const inverse_gamma_distribution<%1%>&, %1%)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -287,11 +288,11 @@ inline RealType quantile(const inverse_gamma_distribution& dis } template -inline RealType cdf(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)"; RealType shape = c.dist.shape(); RealType scale = c.dist.scale(); @@ -310,11 +311,11 @@ inline RealType cdf(const complemented2_type -inline RealType quantile(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const inverse_gamma_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const inverse_gamma_distribution<%1%>&, %1%)"; RealType shape = c.dist.shape(); RealType scale = c.dist.scale(); @@ -338,11 +339,11 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const inverse_gamma_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mean(const inverse_gamma_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::mean(const inverse_gamma_distribution<%1%>&)"; + constexpr auto function = "boost::math::mean(const inverse_gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -365,11 +366,11 @@ inline RealType mean(const inverse_gamma_distribution& dist) } // mean template -inline RealType variance(const inverse_gamma_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType variance(const inverse_gamma_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::variance(const inverse_gamma_distribution<%1%>&)"; + constexpr auto function = "boost::math::variance(const inverse_gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -391,11 +392,11 @@ inline RealType variance(const inverse_gamma_distribution& dis } template -inline RealType mode(const inverse_gamma_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mode(const inverse_gamma_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::mode(const inverse_gamma_distribution<%1%>&)"; + constexpr auto function = "boost::math::mode(const inverse_gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -418,11 +419,11 @@ inline RealType mode(const inverse_gamma_distribution& dist) //} template -inline RealType skewness(const inverse_gamma_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType skewness(const inverse_gamma_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::skewness(const inverse_gamma_distribution<%1%>&)"; + constexpr auto function = "boost::math::skewness(const inverse_gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -444,11 +445,11 @@ inline RealType skewness(const inverse_gamma_distribution& dis } template -inline RealType kurtosis_excess(const inverse_gamma_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const inverse_gamma_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::kurtosis_excess(const inverse_gamma_distribution<%1%>&)"; + constexpr auto function = "boost::math::kurtosis_excess(const inverse_gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -470,9 +471,9 @@ inline RealType kurtosis_excess(const inverse_gamma_distribution -inline RealType kurtosis(const inverse_gamma_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const inverse_gamma_distribution& dist) { - static const char* function = "boost::math::kurtosis(const inverse_gamma_distribution<%1%>&)"; + constexpr auto function = "boost::math::kurtosis(const inverse_gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); diff --git a/include/boost/math/distributions/inverse_gaussian.hpp b/include/boost/math/distributions/inverse_gaussian.hpp index b31d1c9257..20d3b6bdd5 100644 --- a/include/boost/math/distributions/inverse_gaussian.hpp +++ b/include/boost/math/distributions/inverse_gaussian.hpp @@ -1,6 +1,6 @@ // Copyright John Maddock 2010. // Copyright Paul A. Bristow 2010. - +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -49,17 +49,17 @@ // http://www.statsci.org/s/inverse_gaussian.s and http://www.statsci.org/s/inverse_gaussian.html -//#include +#include +#include #include // for erf/erfc. #include #include #include #include // for gamma function - #include #include - -#include +#include +#include namespace boost{ namespace math{ @@ -70,10 +70,10 @@ class inverse_gaussian_distribution using value_type = RealType; using policy_type = Policy; - explicit inverse_gaussian_distribution(RealType l_mean = 1, RealType l_scale = 1) + BOOST_MATH_GPU_ENABLED explicit inverse_gaussian_distribution(RealType l_mean = 1, RealType l_scale = 1) : m_mean(l_mean), m_scale(l_scale) { // Default is a 1,1 inverse_gaussian distribution. - static const char* function = "boost::math::inverse_gaussian_distribution<%1%>::inverse_gaussian_distribution"; + constexpr auto function = "boost::math::inverse_gaussian_distribution<%1%>::inverse_gaussian_distribution"; RealType result; detail::check_scale(function, l_scale, &result, Policy()); @@ -81,22 +81,22 @@ class inverse_gaussian_distribution detail::check_x_gt0(function, l_mean, &result, Policy()); } - RealType mean()const + BOOST_MATH_GPU_ENABLED RealType mean()const { // alias for location. return m_mean; // aka mu } // Synonyms, provided to allow generic use of find_location and find_scale. - RealType location()const + BOOST_MATH_GPU_ENABLED RealType location()const { // location, aka mu. return m_mean; } - RealType scale()const + BOOST_MATH_GPU_ENABLED RealType scale()const { // scale, aka lambda. return m_scale; } - RealType shape()const + BOOST_MATH_GPU_ENABLED RealType shape()const { // shape, aka phi = lambda/mu. return m_scale / m_mean; } @@ -119,29 +119,29 @@ inverse_gaussian_distribution(RealType,RealType)->inverse_gaussian_distribution< #endif template -inline std::pair range(const inverse_gaussian_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline boost::math::pair range(const inverse_gaussian_distribution& /*dist*/) { // Range of permissible values for random variable x, zero to max. using boost::math::tools::max_value; - return std::pair(static_cast(0.), max_value()); // - to + max value. + return boost::math::pair(static_cast(0.), max_value()); // - to + max value. } template -inline std::pair support(const inverse_gaussian_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline boost::math::pair support(const inverse_gaussian_distribution& /*dist*/) { // Range of supported values for random variable x, zero to max. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. using boost::math::tools::max_value; - return std::pair(static_cast(0.), max_value()); // - to + max value. + return boost::math::pair(static_cast(0.), max_value()); // - to + max value. } template -inline RealType pdf(const inverse_gaussian_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType pdf(const inverse_gaussian_distribution& dist, const RealType& x) { // Probability Density Function BOOST_MATH_STD_USING // for ADL of std functions RealType scale = dist.scale(); RealType mean = dist.mean(); RealType result = 0; - static const char* function = "boost::math::pdf(const inverse_gaussian_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::pdf(const inverse_gaussian_distribution<%1%>&, %1%)"; if(false == detail::check_scale(function, scale, &result, Policy())) { return result; @@ -171,14 +171,14 @@ inline RealType pdf(const inverse_gaussian_distribution& dist, } // pdf template -inline RealType logpdf(const inverse_gaussian_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType logpdf(const inverse_gaussian_distribution& dist, const RealType& x) { // Probability Density Function BOOST_MATH_STD_USING // for ADL of std functions RealType scale = dist.scale(); RealType mean = dist.mean(); - RealType result = -std::numeric_limits::infinity(); - static const char* function = "boost::math::logpdf(const inverse_gaussian_distribution<%1%>&, %1%)"; + RealType result = -boost::math::numeric_limits::infinity(); + constexpr auto function = "boost::math::logpdf(const inverse_gaussian_distribution<%1%>&, %1%)"; if(false == detail::check_scale(function, scale, &result, Policy())) { return result; @@ -198,7 +198,7 @@ inline RealType logpdf(const inverse_gaussian_distribution& di if (x == 0) { - return std::numeric_limits::quiet_NaN(); // Convenient, even if not defined mathematically. log(0) + return boost::math::numeric_limits::quiet_NaN(); // Convenient, even if not defined mathematically. log(0) } const RealType two_pi = boost::math::constants::two_pi(); @@ -208,13 +208,13 @@ inline RealType logpdf(const inverse_gaussian_distribution& di } // pdf template -inline RealType cdf(const inverse_gaussian_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const inverse_gaussian_distribution& dist, const RealType& x) { // Cumulative Density Function. BOOST_MATH_STD_USING // for ADL of std functions. RealType scale = dist.scale(); RealType mean = dist.mean(); - static const char* function = "boost::math::cdf(const inverse_gaussian_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const inverse_gaussian_distribution<%1%>&, %1%)"; RealType result = 0; if(false == detail::check_scale(function, scale, &result, Policy())) { @@ -257,11 +257,11 @@ template struct inverse_gaussian_quantile_functor { - inverse_gaussian_quantile_functor(const boost::math::inverse_gaussian_distribution dist, RealType const& p) + BOOST_MATH_GPU_ENABLED inverse_gaussian_quantile_functor(const boost::math::inverse_gaussian_distribution dist, RealType const& p) : distribution(dist), prob(p) { } - boost::math::tuple operator()(RealType const& x) + BOOST_MATH_GPU_ENABLED boost::math::tuple operator()(RealType const& x) { RealType c = cdf(distribution, x); RealType fx = c - prob; // Difference cdf - value - to minimize. @@ -277,11 +277,11 @@ struct inverse_gaussian_quantile_functor template struct inverse_gaussian_quantile_complement_functor { - inverse_gaussian_quantile_complement_functor(const boost::math::inverse_gaussian_distribution dist, RealType const& p) + BOOST_MATH_GPU_ENABLED inverse_gaussian_quantile_complement_functor(const boost::math::inverse_gaussian_distribution dist, RealType const& p) : distribution(dist), prob(p) { } - boost::math::tuple operator()(RealType const& x) + BOOST_MATH_GPU_ENABLED boost::math::tuple operator()(RealType const& x) { RealType c = cdf(complement(distribution, x)); RealType fx = c - prob; // Difference cdf - value - to minimize. @@ -298,7 +298,7 @@ struct inverse_gaussian_quantile_complement_functor namespace detail { template - inline RealType guess_ig(RealType p, RealType mu = 1, RealType lambda = 1) + BOOST_MATH_GPU_ENABLED inline RealType guess_ig(RealType p, RealType mu = 1, RealType lambda = 1) { // guess at random variate value x for inverse gaussian quantile. BOOST_MATH_STD_USING using boost::math::policies::policy; @@ -350,14 +350,14 @@ namespace detail } // namespace detail template -inline RealType quantile(const inverse_gaussian_distribution& dist, const RealType& p) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const inverse_gaussian_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std functions. // No closed form exists so guess and use Newton Raphson iteration. RealType mean = dist.mean(); RealType scale = dist.scale(); - static const char* function = "boost::math::quantile(const inverse_gaussian_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const inverse_gaussian_distribution<%1%>&, %1%)"; RealType result = 0; if(false == detail::check_scale(function, scale, &result, Policy())) @@ -388,7 +388,7 @@ inline RealType quantile(const inverse_gaussian_distribution& // digits used to control how accurate to try to make the result. // To allow user to control accuracy versus speed, int get_digits = policies::digits();// get digits from policy, - std::uintmax_t max_iter = policies::get_max_root_iterations(); // and max iterations. + boost::math::uintmax_t max_iter = policies::get_max_root_iterations(); // and max iterations. using boost::math::tools::newton_raphson_iterate; result = newton_raphson_iterate(inverse_gaussian_quantile_functor(dist, p), guess, min, max, get_digits, max_iter); @@ -401,14 +401,14 @@ inline RealType quantile(const inverse_gaussian_distribution& } // quantile template -inline RealType cdf(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions. RealType scale = c.dist.scale(); RealType mean = c.dist.mean(); RealType x = c.param; - static const char* function = "boost::math::cdf(const complement(inverse_gaussian_distribution<%1%>&), %1%)"; + constexpr auto function = "boost::math::cdf(const complement(inverse_gaussian_distribution<%1%>&), %1%)"; RealType result = 0; if(false == detail::check_scale(function, scale, &result, Policy())) @@ -437,13 +437,13 @@ inline RealType cdf(const complemented2_type -inline RealType quantile(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions RealType scale = c.dist.scale(); RealType mean = c.dist.mean(); - static const char* function = "boost::math::quantile(const complement(inverse_gaussian_distribution<%1%>&), %1%)"; + constexpr auto function = "boost::math::quantile(const complement(inverse_gaussian_distribution<%1%>&), %1%)"; RealType result = 0; if(false == detail::check_scale(function, scale, &result, Policy())) return result; @@ -464,7 +464,7 @@ inline RealType quantile(const complemented2_type::digits; // Maximum possible binary digits accuracy for type T. // digits used to control how accurate to try to make the result. int get_digits = policies::digits(); - std::uintmax_t max_iter = policies::get_max_root_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_root_iterations(); using boost::math::tools::newton_raphson_iterate; result = newton_raphson_iterate(inverse_gaussian_quantile_complement_functor(c.dist, q), guess, min, max, get_digits, max_iter); if (max_iter >= policies::get_max_root_iterations()) @@ -476,25 +476,25 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const inverse_gaussian_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mean(const inverse_gaussian_distribution& dist) { // aka mu return dist.mean(); } template -inline RealType scale(const inverse_gaussian_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType scale(const inverse_gaussian_distribution& dist) { // aka lambda return dist.scale(); } template -inline RealType shape(const inverse_gaussian_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType shape(const inverse_gaussian_distribution& dist) { // aka phi return dist.shape(); } template -inline RealType standard_deviation(const inverse_gaussian_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType standard_deviation(const inverse_gaussian_distribution& dist) { BOOST_MATH_STD_USING RealType scale = dist.scale(); @@ -504,7 +504,7 @@ inline RealType standard_deviation(const inverse_gaussian_distribution -inline RealType mode(const inverse_gaussian_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mode(const inverse_gaussian_distribution& dist) { BOOST_MATH_STD_USING RealType scale = dist.scale(); @@ -515,7 +515,7 @@ inline RealType mode(const inverse_gaussian_distribution& dist } template -inline RealType skewness(const inverse_gaussian_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType skewness(const inverse_gaussian_distribution& dist) { BOOST_MATH_STD_USING RealType scale = dist.scale(); @@ -525,7 +525,7 @@ inline RealType skewness(const inverse_gaussian_distribution& } template -inline RealType kurtosis(const inverse_gaussian_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const inverse_gaussian_distribution& dist) { RealType scale = dist.scale(); RealType mean = dist.mean(); @@ -534,7 +534,7 @@ inline RealType kurtosis(const inverse_gaussian_distribution& } template -inline RealType kurtosis_excess(const inverse_gaussian_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const inverse_gaussian_distribution& dist) { RealType scale = dist.scale(); RealType mean = dist.mean(); diff --git a/include/boost/math/distributions/landau.hpp b/include/boost/math/distributions/landau.hpp new file mode 100644 index 0000000000..129eca2879 --- /dev/null +++ b/include/boost/math/distributions/landau.hpp @@ -0,0 +1,4642 @@ +// Copyright Takuma Yoshimura 2024. +// Copyright Matt Borland 2024 +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_STATS_LANDAU_HPP +#define BOOST_STATS_LANDAU_HPP + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4127) // conditional expression is constant +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef BOOST_MATH_HAS_NVRTC +#include +#include +#include +#include +#endif + +namespace boost { namespace math { +template +class landau_distribution; + +namespace detail { + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_pdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x < 1) { + // Rational Approximation + // Maximum Relative Error: 6.1179e-18 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(2.62240126375351657026e-1), + static_cast(3.37943593381366824691e-1), + static_cast(1.53537606095123787618e-1), + static_cast(3.01423783265555668011e-2), + static_cast(2.66982581491576132363e-3), + static_cast(-1.57344124519315009970e-5), + static_cast(3.46237168332264544791e-7), + static_cast(2.54512306953704347532e-8), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(1.61596691542333069131e0), + static_cast(1.31560197919990191004e0), + static_cast(6.37865139714920275881e-1), + static_cast(1.99051021258743986875e-1), + static_cast(3.73788085017437528274e-2), + static_cast(3.72580876403774116752e-3), + }; + + result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x); + } + else if(x < 2){ + RealType t = x - 1; + + // Rational Approximation + // Maximum Relative Error: 2.1560e-17 + BOOST_MATH_STATIC const RealType P[6] = { + static_cast(1.63531240868022603476e-1), + static_cast(1.42818648212508067982e-1), + static_cast(4.95816076364679661943e-2), + static_cast(8.59234710489723831273e-3), + static_cast(5.76649181954629544285e-4), + static_cast(-5.66279925274108366994e-7), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(1.41478104966077351483e0), + static_cast(9.41180365857002724714e-1), + static_cast(3.65084346985789448244e-1), + static_cast(8.77396986274371571301e-2), + static_cast(1.24233749817860139205e-2), + static_cast(8.57476298543168142524e-4), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 4) { + RealType t = x - 2; + + // Rational Approximation + // Maximum Relative Error: 9.1732e-19 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(9.55242261334771588094e-2), + static_cast(6.66529732353979943139e-2), + static_cast(1.80958840194356287100e-2), + static_cast(2.34205449064047793618e-3), + static_cast(1.16859089123286557482e-4), + static_cast(-1.48761065213531458940e-7), + static_cast(4.37245276130361710865e-9), + static_cast(-8.10479404400603805292e-11), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(1.21670723402658089612e0), + static_cast(6.58224466688607822769e-1), + static_cast(2.00828142796698077403e-1), + static_cast(3.64962053761472303153e-2), + static_cast(3.76034152661165826061e-3), + static_cast(1.74723754509505656326e-4), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 8) { + RealType t = x - 4; + + // Rational Approximation + // Maximum Relative Error: 7.6621e-18 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(3.83643820409470770350e-2), + static_cast(1.97555000044256883088e-2), + static_cast(3.71748668368617282698e-3), + static_cast(3.04022677703754827113e-4), + static_cast(8.76328889784070114569e-6), + static_cast(-3.34900379044743745961e-9), + static_cast(5.36581791174380716937e-11), + static_cast(-5.50656207669255770963e-13), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(9.09290785092251223006e-1), + static_cast(3.49404120360701349529e-1), + static_cast(7.23730835206014275634e-2), + static_cast(8.47875744543245845354e-3), + static_cast(5.28021165718081084884e-4), + static_cast(1.33941126695887244822e-5), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 16) { + RealType t = x - 8; + + // Rational Approximation + // Maximum Relative Error: 6.6311e-19 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(1.12656323880287532947e-2), + static_cast(2.87311140580416132088e-3), + static_cast(2.61788674390925516376e-4), + static_cast(9.74096895307400300508e-6), + static_cast(1.19317564431052244154e-7), + static_cast(-6.99543778035110375565e-12), + static_cast(4.33383971045699197233e-14), + static_cast(-1.75185581239955717728e-16), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(4.94430267268436822392e-1), + static_cast(1.00370783567964448346e-1), + static_cast(1.05989564733662652696e-2), + static_cast(6.04942184472254239897e-4), + static_cast(1.72741008294864428917e-5), + static_cast(1.85398104367945191152e-7), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 32) { + RealType t = x - 16; + + // Rational Approximation + // Maximum Relative Error: 5.6459e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(2.83847488747490686627e-3), + static_cast(4.95641151588714788287e-4), + static_cast(2.79159792287747766415e-5), + static_cast(5.93951761884139733619e-7), + static_cast(3.89602689555407749477e-9), + static_cast(-4.86595415551823027835e-14), + static_cast(9.68524606019510324447e-17), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(3.01847536766892219351e-1), + static_cast(3.63152433272831196527e-2), + static_cast(2.20938897517130866817e-3), + static_cast(7.05424834024833384294e-5), + static_cast(1.09010608366510938768e-6), + static_cast(6.08711307451776092405e-9), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 64) { + RealType t = x - 32; + + // Rational Approximation + // Maximum Relative Error: 6.5205e-17 + BOOST_MATH_STATIC const RealType P[6] = { + static_cast(6.85767880395157523315e-4), + static_cast(4.08288098461672797376e-5), + static_cast(8.10640732723079320426e-7), + static_cast(6.10891161505083972565e-9), + static_cast(1.37951861368789813737e-11), + static_cast(-1.25906441382637535543e-17), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(1.23722380864018634550e-1), + static_cast(6.05800403141772433527e-3), + static_cast(1.47809654123655473551e-4), + static_cast(1.84909364620926802201e-6), + static_cast(1.08158235309005492372e-8), + static_cast(2.16335841791921214702e-11), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(x) < 8) { + RealType t = log2(ldexp(x, -6)); + + // Rational Approximation + // Maximum Relative Error: 3.5572e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(6.78613480244945294595e-1), + static_cast(9.61675759893298556080e-1), + static_cast(3.45159462006746978086e-1), + static_cast(6.32803373041761027814e-2), + static_cast(6.93646175256407852991e-3), + static_cast(4.69867700169714338273e-4), + static_cast(1.76219117171149694118e-5), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(1.44693640094228656726e0), + static_cast(5.46298626321591162873e-1), + static_cast(1.01572892952421447864e-1), + static_cast(1.04982575345680980744e-2), + static_cast(7.65591730392359463367e-4), + static_cast(2.69383817793665674679e-5), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x * x); + } + else if (ilogb(x) < 16) { + RealType t = log2(ldexp(x, -8)); + + // Rational Approximation + // Maximum Relative Error: 5.7408e-17 + BOOST_MATH_STATIC const RealType P[9] = { + static_cast(6.51438485661317103070e-1), + static_cast(2.67941671074735988081e-1), + static_cast(5.18564629295719783781e-2), + static_cast(6.18976337233135940231e-3), + static_cast(5.08042228681335953236e-4), + static_cast(2.97268230746003939324e-5), + static_cast(1.24283200336057908183e-6), + static_cast(3.35670921544537716055e-8), + static_cast(5.06987792821954864905e-10), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1.), + static_cast(4.23792506680780833665e-1), + static_cast(8.17040643791396371682e-2), + static_cast(9.63961713981621216197e-3), + static_cast(8.06584713485725204135e-4), + static_cast(4.62050471704120102023e-5), + static_cast(1.96919734048024406173e-6), + static_cast(5.23890369587103685278e-8), + static_cast(7.99399970089366802728e-10), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x * x); + } + else if (ilogb(x) < 32) { + RealType t = log2(ldexp(x, -16)); + + // Rational Approximation + // Maximum Relative Error: 1.0195e-17 + BOOST_MATH_STATIC const RealType P[10] = { + static_cast(6.36745544906925230102e-1), + static_cast(2.06319686601209029700e-1), + static_cast(3.27498059700133287053e-2), + static_cast(3.30913729536910108000e-3), + static_cast(2.34809665750270531592e-4), + static_cast(1.21234086846551635407e-5), + static_cast(4.55253563898240922019e-7), + static_cast(1.17544434819877511707e-8), + static_cast(1.76754192209232807941e-10), + static_cast(-2.78616504641875874275e-17), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1.), + static_cast(3.24145654925686670201e-1), + static_cast(5.14350019501887110402e-2), + static_cast(5.19867984016649969928e-3), + static_cast(3.68798608372265018587e-4), + static_cast(1.90449594112666257344e-5), + static_cast(7.15068261954120746192e-7), + static_cast(1.84646096630493837656e-8), + static_cast(2.77636277083994601941e-10), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x * x); + } + else if (ilogb(x) < 64) { + RealType t = log2(ldexp(x, -32)); + + // Rational Approximation + // Maximum Relative Error: 8.0433e-17 + BOOST_MATH_STATIC const RealType P[9] = { + static_cast(6.36619776379492082324e-1), + static_cast(2.68158440168597706495e-1), + static_cast(5.49040993767853738389e-2), + static_cast(7.23458585096723552751e-3), + static_cast(6.85438876301780090281e-4), + static_cast(4.84561891424380633578e-5), + static_cast(2.82092117716081590941e-6), + static_cast(9.57557353473514565245e-8), + static_cast(5.16773829224576217348e-9), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1.), + static_cast(4.21222294324039934056e-1), + static_cast(8.62431574655015481812e-2), + static_cast(1.13640608906815986975e-2), + static_cast(1.07668486873466248474e-3), + static_cast(7.61148039258802068270e-5), + static_cast(4.43109262308946031382e-6), + static_cast(1.50412757354817481381e-7), + static_cast(8.11746432728995551732e-9), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x * x); + } + else{ + result = 2 / (constants::pi() * x * x); + } + + return result; +} + + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_pdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x < 1) { + // Rational Approximation + // Maximum Relative Error: 7.4629e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.62240126375351657025589608183516471315e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.94698530837122818345222883832757839888e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06728003509081587907620543204047536319e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.41256254272104786752190871391781331271e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.34420233794664437979710204055323742199e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.55021337841765667713712845735938627884e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.90557752737535583908921594594761570259e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.89899202021818926241643215600800085123e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.19635143827754893815649685600837995626e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.90989458941330917626663002392683325107e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92038069341802550019371049232152823407e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.40251964644989324856906264776204142653e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.55873076454666680466531097660277995317e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.80771940886011613393622410616035955976e-13), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.35771004134750535117224809381897395331e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.37002484862962406489509174332580745411e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.40833952846707180337506160933176158766e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.81709029902887471895588386777029652661e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.98824705588020901032379932614151640505e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.83767868823957223030472664574235892682e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35718995485026064249286377096427165287e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.37305148463792922843850823142976586205e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.06575764439154972544253668821920460826e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07663693811543002088092708395572161856e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.09711221791106684926377106608027279057e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.91302186546138009232520527964387543006e-6), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x); + } + else if (x < 2) { + RealType t = x - 1; + + // Rational Approximation + // Maximum Relative Error: 6.6684e-38 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63531240868022603475813051802104652763e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.17803013130262393286657457221415701909e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.77780575692956605214628767143941600132e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44224824965135546671876867759691622832e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.93294212655117265065191070995706405837e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16021988737209938284910541133167243163e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.89245591723934954825306673917695058577e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.09614731993308746343064543583426077485e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48578173962833046113032690615443901556e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.91098199913613774034789276073191721350e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.46788618410999858374206722394998550706e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.14296339768511312584670061679121003569e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.52631422678659858574974085885146420544e-15), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.48481735580594347909096198787726314434e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.91598585888012869317473155570063821216e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.12672162924784178863164220170459406872e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06981909640884405591730537337036849744e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.89767326897694369071250285702215471082e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05098647402530640576816174680275844283e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.10454903166951593161839822697382452489e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.08850649343579977859251275585834901546e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.21168773136767495960695426112972188729e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21420361560900449851206650427538430926e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.84456961344035545134425261150891935402e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46462389440125559723382692664970874255e-8), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 4) { + RealType t = x - 2; + + // Rational Approximation + // Maximum Relative Error: 6.3397e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.55242261334771588093967856464157010584e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48866040463435403672044647455806606078e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04241715667984551487882549843428953917e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.32030608366022483736940428739436921577e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17209924605508887793687609139940354371e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.16808856405217460367038406337257561698e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75466331296758720822164534334356742122e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35657250222166360635152712608912585973e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28870137478821561164537700376942753108e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.07556331078347991810236646922418944687e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.18067019247793233704208913546277631267e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.96745094401496364651919224112160111958e-12), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.07735872062601280828576861757316683396e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.00667909426245388114411629440735066799e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18840123665979969294228925712434860653e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.79233661359264185181083948452464063323e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.38221013998193410441723488211346327478e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.91365002115280149925615665651486504495e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.50379182630668701710656913597366961277e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.03946139315999749917224356955071595508e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.95417998434227083224840824790387887539e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.05109028829536837163462811783445124876e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.33125282515685091345480270760501403655e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.58127838888839012133236453180928291822e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.64781659622256824499981528095809140284e-12), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 8) { + RealType t = x - 4; + + // Rational Approximation + // Maximum Relative Error: 8.0238e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.83643820409470770350079809236512802618e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.02996762669868036727057860510914079553e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88220267784864518806154823373656292346e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.12677705163934102871251710968247891123e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.96642570169484318623869835991454809217e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04358807405587072010621764865118316919e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.09461879230275452416933096674703383719e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.06823998699058163165831211561331795518e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24129479811279469256914665585439417704e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.01799222004929573125167949870797564244e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27744716755834439008073010185921331093e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.64210356143729930758657624381557123115e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11666384975358223644665199669986358056e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.30202644697506464624965700043476935471e-22), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44479208003384373099160875893986831861e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.54290037675901616362332580709754113529e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.79815821498858750185823401350096868195e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01076480676864621093034009679744852375e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88607467767854661547920709472888000469e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.51572461182263866462295745828009170865e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.39843444671402317250813055670653845815e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60546478324160472036295355872288494327e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.25462551353792877506974677628167909695e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.05915328498722701961972258866550409117e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.20632869761578411246344533841556350518e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99438347491752820345051091574883391217e-12), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 16) { + RealType t = x - 8; + + // Rational Approximation + // Maximum Relative Error: 3.2541e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12656323880287532946687856443190592955e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.31374972240605659239154788518240221417e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.10776910971729651587578902049263096117e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.53872632372452909103332647334935138324e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.81756147611150151751911596225474463602e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75302607308223110644722612796766590029e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33839913867469199941739467004997833889e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.32115127487193219555283158969582307620e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.90766547421015851413713511917307214275e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08939895797457378361211153362169024503e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.88597187949354708113046662952288249250e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62829447082637808482463811005771133942e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.65525705592205245661726488519562256000e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.60698835222044786453848932477732972928e-26), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.88605948104664828377228254521124685930e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.58594705700945215121673591119784576258e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.67113091918430152113322758216774649130e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39583889554372147091140765508385042797e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57139043074134496391251233307552940106e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26451960029396455805403758307828624817e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.30400557427446929311350088728080667203e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.99617890540456503276038942480115937467e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.50232186816498003232143065883536003942e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59310652872918546431499274822722004981e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.82203579442241682923277858553949327687e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10345359368438386945407402887625511801e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.55225829972215033873365516486524181445e-17), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 32) { + RealType t = x - 16; + + // Rational Approximation + // Maximum Relative Error: 4.1276e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.83847488747490686627461184914507143000e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.61220392257287638364190361688188696363e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.42217711448675893329072184826328300776e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20597728166467972373586650878478687059e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.46404433551447410467051774706080733051e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.27909145305324391651548849043874549520e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.33564789388635859003082815215888382619e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.18456219811686603951886248687349029515e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.92730718471866912036453008101994816885e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.51773776414973336511129801645901922234e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32371094281803507447435352076735970857e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44775294242071078601023962869394690897e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.94920633206242554892676642458535141153e-28), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.18030442958390399095902441284074544279e-31), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.65871972115253665568580046072625013145e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.74531522538358367003224536101724206626e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20716479628426451344205712137554469781e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.83247584368619500260722365812456197226e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.52931189426842216323461406426803698335e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19343566926626449933230814579037896037e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.16243058880148231471744235009435586353e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21344088555713979086041331387697053780e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63246599173435592817113618949498524238e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43426263963680589288791782556801934305e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.62386317351298917459659548443220451300e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13281535580097407374477446521496074453e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.27187882784316306216858933778750811182e-21), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 64) { + RealType t = x - 32; + + // Rational Approximation + // Maximum Relative Error: 1.8458e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.85767880395157523314894776472286059373e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07684379950498990874449661385130414967e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.29181715091139597455177955800910928786e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78745116935613858188145093313446961899e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.61522707085521545633529621526418843836e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00989556810424018339768632204186394735e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.94136605359672888838088037894401904574e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.15203266224687619299892471650072720579e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.25349098945982074415471295859193558426e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.31874620165906020409111024866737082384e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.19330888204484008667352280840160186671e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.89951131249530265518610784629981482444e-30), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.35979606245171162602352579985003194602e-33), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.50946115943875327149319867495704969908e-36), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21212467547297045538111676107434471585e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.17663841151156626845609176694801024524e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.25478800461954401173897968683982253458e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.69831763649657690166671862562231448718e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19712058726935472913461138967922524612e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11423395018514913507624349385447326009e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.58664605420655866109404476637021322838e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.15398721299264752103644541934654351463e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17567858878427250079920401604119982576e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.92808825029184923713064129493385469531e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.06007644624654848502783947087038305433e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.01246784499782934986619755015082182398e-23), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(x) < 8) { + RealType t = log2(ldexp(x, -6)); + + // Rational Approximation + // Maximum Relative Error: 2.6634e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.78613480244945294594505480426643613242e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.07362312709864018864207848733814857157e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.47727521897653923649758175033206259109e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04183129813120998456717217121703605830e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.09978729224187570508825456585418357590e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.98739784100617344335742510102186570437e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.08596635852958074572320481325030046975e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.34947456497875218771996878497766058580e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31766866003171430205401377671093088134e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.29444683984117745298484117924452498776e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34885173277203843795065094551227568738e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30306828175920576070486704404727265760e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.05908347665846652276910544097430115068e-13), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07218191317166728296013167220324207427e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.38908532499742180532814291654329829544e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63676664387672566455490461784630320677e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31302647779056928216789214742790688980e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.69477260342662648574925942030720482689e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.82918424748192763052497731722563414651e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.69244295675395948278971027618145225216e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.08928780307959133484802547123672997757e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.11055350627948183551681634293425028439e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22066081452382450191191677443527136733e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78025987104169227624653323808131280009e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.93164997733174955208299290433803918816e-13), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x * x); + } + else if (ilogb(x) < 16) { + RealType t = log2(ldexp(x, -8)); + + // Rational Approximation + // Maximum Relative Error: 6.1919e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[19] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.51438485661317103069553924870169052838e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.29652867028564588922931020456447362877e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.90557738902930002845457640269863338815e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.47622170600415955276436226439948455362e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.75198213226024095368607442455597948634e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.73010116224706573149404022585502812698e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.33440551266376466187512220300943206212e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.27556365758364667507686872656121131255e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.63395763346533783414747536236033733143e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.75408632486279069728789506666930014630e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.74194099205847568739445023334735086627e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.52462367172968216583968200390021647482e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.75367408334713835736514158797013854282e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.62633983586253025227038002631010874719e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46717630077826649018810277799043037738e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.00642537643332236333695338824014611799e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.47351714774371338348451112020520067028e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.15896012319823666881998903857141624070e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.62176014448801854863922778456328119208e-25), + }; + BOOST_MATH_STATIC const RealType Q[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.79042471052521112984740498925369905803e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.55058068535501327896327971200536085268e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33143443551335870264469963604049242325e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75325348141376361676246108294525717629e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.28871858542582365161221803267369985933e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.23702867786056336210872367019916245663e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.14513776996445072162386201808986222616e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.13763070277828149031445006534179375988e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75529866599039195417128499359378019030e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.53029524184341515115464886126119582515e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.10598685541492162454676538516969294049e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75587930183994618721688808612207567233e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.83150895141383746641924725237948860959e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30675015193353451939138512698571954110e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.71774361582156518394662911172142577047e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.03397072601182597002547703682673198965e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.52666999314026491934445577764441483687e-20), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x * x); + } + else if (ilogb(x) < 32) { + RealType t = log2(ldexp(x, -16)); + + // Rational Approximation + // Maximum Relative Error: 1.2411e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36745544906925230101752563433306496000e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.73688900814770369626527563956988302379e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.81718746296195151971617726268038570065e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.13663059680440438907042970413471861121e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.40004645275531255402942177790836798523e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.80059489775751412372432345156902685277e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.47699576477278882708291693658669435536e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.45226121992756638990044029871581321461e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13406331882918393195342615955627442395e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.46682598893946975917562485374893408094e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50450743907497671918301557074470352707e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.33121239192492785826422815650499088833e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05998176182038788839361491871608950696e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.17857918044922309623941523489531919822e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.67865547879145051715131144371287619666e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.89654931108624296326740455618289840327e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.73017950634516660552375272495618707905e-22), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.68519137981001059472024985205381913202e-24), + }; + BOOST_MATH_STATIC const RealType Q[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.29948066505039082395951244410552705780e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.13730690908098361287472898564563217987e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27810872138103132689695155123062073221e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31948058845675193039732511839435290811e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06822729610151747708260147063757668707e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.03250522904270408071762059653475885811e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.85197262150009124871794386644476067020e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78139536405831228129042087771755615472e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.01642938314578533660138738069251610818e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36328724659833107203404258336776286146e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.80342430290059616305921915291683180697e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66502051110007556897014898713746069491e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.42209715361911856322028597714105225748e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.77842582605458905635718323117222788078e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.69147628396460384758492682185049535079e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.43015110519230289924122344324563890953e-22), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.21788819753161690674882271896091269356e-24), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x * x); + } + else if (ilogb(x) < 64) { + RealType t = log2(ldexp(x, -32)); + + // Rational Approximation + // Maximum Relative Error: 2.0348e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[19] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36619776379492082323649724050601750141e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29818158612993476124594583743266388964e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.07736315744724186061845512973085067283e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72566458808745644851080213349673559756e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.01243670706840752914099834172565920736e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65306557791300593593488790517297048902e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41751291649776832705247036453540452119e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.26652535267657618112731521308564571490e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.32742926765578976373764178875983383214e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.32948532312961882464151446137719196209e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96536595631611560703804402181953334762e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.48463581600017734001916804890205661347e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.65588239861378749665334852913775575615e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39462290798829172203386678450961569536e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83049279786679854738508318703604392055e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.87131679136229094080572090496960701828e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35977519905679446758726709186381481753e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.50639358104925465711435411537609380290e-26), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.47593981758247424082096107205150226114e-40), + }; + BOOST_MATH_STATIC const RealType Q[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.60997521267746350015610841742718472657e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.40470704349086277215167519790809981379e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.42305660178694704379572259575557934523e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.30272082429322808188807034927827414359e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.16742567294582284534194935923915261582e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22662407906450293978092195442686428843e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.84343501655116670387608730076359018869e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.65591734166216912475609790035240582537e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15131286290573570519912674341226377625e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.08718962387679715644203327604824250850e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.04444946843492647477476784817227903589e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35966282749098189010715902284098451987e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19066854132814661112207991393498039851e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.87533136296192957063599695937632598999e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.93945754223094281767677343057286164777e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13592988790740273103099465658198617078e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.64942281110142621080966631872844557766e-26), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x * x); + } + else if (ilogb(x) < 128) { + RealType t = log2(ldexp(x, -64)); + + // Rational Approximation + // Maximum Relative Error: 4.3963e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36619772367581344984274685280416528592e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72417390936686577479751162141499390532e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.74319117326966091295365258834959120634e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.94269681742277805376258823511210253023e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.09354876913180019634171748490068797632e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.46986612543101357465265079580805403382e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.21726753043764920243710352514279216684e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29971756326232375757519588897328507962e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06770117983967828996891025614645348127e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.27141668055392041978388268556174062945e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48383887723476619460217715361289178429e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.49530301203157403427315504054500005836e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18668427867427341566476567665953082312e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.73377083349017331494144334612902128610e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.32380647653444581710582396517056104063e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.29865827039123699411352876626634361936e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07464506614287925844993490382319608619e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60173555862875972119871402681133785088e-23), + }; + BOOST_MATH_STATIC const RealType Q[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.27912237038396638341492536677313983747e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.02138359905285600768927677649467546192e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24763589856532154099789305018886222841e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27133166772875885088000073325642460162e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01628419446817660009223289575239926907e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.62446834592284424116329218260348474201e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.61238790103816844895453935630752859272e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.67714109140674398508739253084218270557e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.70952563202454851902810005226033501692e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.33080865791583428494353408816388908148e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.06120545912923145572220606396715398781e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.86403930600680015325844027465766431761e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.29419718354538719350803683985104818654e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.36261565790718847159482447247645891176e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46062982552515416754702177333530968405e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.68804852250549346018535616711418533423e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.51600033199082754845231795160728350588e-23), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x * x); + } + else { + result = 2 / (constants::pi() * x * x); + } + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_pdf_minus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if(x >= -1){ + RealType t = x + 1; + + // Rational Approximation + // Maximum Relative Error: 9.3928e-17 + BOOST_MATH_STATIC const RealType P[9] = { + static_cast(2.21762208692280384264e-1), + static_cast(7.10041055270973473923e-1), + static_cast(8.66556480457430718380e-1), + static_cast(4.78718713740071686348e-1), + static_cast(1.03670563650247405820e-1), + static_cast(4.31699263023057628473e-3), + static_cast(1.72029926636215817416e-3), + static_cast(-2.76271972015177236271e-4), + static_cast(1.89483904652983701680e-5), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1), + static_cast(2.18155995697310361937e0), + static_cast(2.53173077603836285217e0), + static_cast(1.91802065831309251416e0), + static_cast(9.94481663032480077373e-1), + static_cast(3.72037148486473195054e-1), + static_cast(8.85828240211801048938e-2), + static_cast(1.41354784778520560313e-2), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -2) { + RealType t = x + 2; + + // Rational Approximation + // Maximum Relative Error: 2.4742e-18 + BOOST_MATH_STATIC const RealType P[11] = { + static_cast(6.50763682207511020789e-3), + static_cast(5.73790055136022120436e-2), + static_cast(2.22375662069496257066e-1), + static_cast(4.92288611166073916396e-1), + static_cast(6.74552077334695078716e-1), + static_cast(5.75057550963763663751e-1), + static_cast(2.85690710485234671432e-1), + static_cast(6.73776735655426117231e-2), + static_cast(3.80321995712675339999e-3), + static_cast(1.09503400950148681072e-3), + static_cast(-9.00045301380982997382e-5), + }; + BOOST_MATH_STATIC const RealType Q[11] = { + static_cast(1), + static_cast(1.07919389927659014373e0), + static_cast(2.56142472873207168042e0), + static_cast(1.68357271228504881003e0), + static_cast(2.23924151033591770613e0), + static_cast(9.05629695159584880257e-1), + static_cast(8.94372028246671579022e-1), + static_cast(1.98616842716090037437e-1), + static_cast(1.70142519339469434183e-1), + static_cast(1.46288923980509020713e-2), + static_cast(1.26171654901120724762e-2), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + const static RealType lambda_bias = static_cast(1.45158270528945486473); // (= log(pi/2)+1) + + RealType sigma = exp(-x * constants::pi() / 2 - lambda_bias); + RealType s = exp(-sigma) * sqrt(sigma); + + if (x >= -4) { + RealType t = -x - 2; + + // Rational Approximation + // Maximum Relative Error: 5.8685e-18 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(6.31126317567898819465e-1), + static_cast(5.28493759149515726917e-1), + static_cast(3.28301410420682938866e-1), + static_cast(1.31682639578153092699e-1), + static_cast(3.86573798047656547423e-2), + static_cast(7.77797337463414935830e-3), + static_cast(9.97883658430364658707e-4), + static_cast(6.05131104440018116255e-5), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1), + static_cast(8.47781139548258655981e-1), + static_cast(5.21797290075642096762e-1), + static_cast(2.10939174293308469446e-1), + static_cast(6.14856955543769263502e-2), + static_cast(1.24427885618560158811e-2), + static_cast(1.58973907730896566627e-3), + static_cast(9.66647686344466292608e-5), + }; + + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -5.1328125) { + RealType t = -x - 4; + + // Rational Approximation + // Maximum Relative Error: 3.2532e-17 + BOOST_MATH_STATIC const RealType P[9] = { + static_cast(6.26864481454444278646e-1), + static_cast(5.10647753508714204745e-1), + static_cast(1.98551443303285119497e-1), + static_cast(4.71644854289800143386e-2), + static_cast(7.71285919105951697285e-3), + static_cast(8.93551020612017939395e-4), + static_cast(6.97020145401946303751e-5), + static_cast(4.17249760274638104772e-6), + static_cast(7.73502439313710606153e-12), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1), + static_cast(8.15124079722976906223e-1), + static_cast(3.16755852188961901369e-1), + static_cast(7.52819418000330690962e-2), + static_cast(1.23053506566779662890e-2), + static_cast(1.42615273721494498141e-3), + static_cast(1.11211928184477279204e-4), + static_cast(6.65899898061789485757e-6), + }; + + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + result = 0; + } + } + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_pdf_minus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x >= -1) { + RealType t = x + 1; + + // Rational Approximation + // Maximum Relative Error: 1.2803e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[16] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21762208692280384264052188465103527015e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07121154108880017947709737976750200391e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.34036993772851526455115746887751392080e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.06347688547967680654012636399459376006e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.68662427153576049083876306225433068713e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.67496398036468361727297056409545434117e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.69289909624425652939466055042210850769e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65649060232973461318206716040181929160e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.93006819232611588097575675157841312689e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34514211575975820725706925256381036061e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.86184594939834946952489805173559003431e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66982890863184520310462776294335540260e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.28944885271022303878175622411438230193e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.47136245900831864668353768185407977846e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.98034330388999615249606466662289782222e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.67931741921878993598048665757824165533e-12), + }; + BOOST_MATH_STATIC const RealType Q[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.81019852414657529520034272090632311645e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.51602582973416348091361820936922274106e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.87246706500788771729605610442552651673e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.55758863380051182011815572544985924963e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.16921634066377885762356020006515057786e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.28590978860106110644638308039189352463e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.07182688002603587927920766666962846169e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.14413931232875917473403467095618397172e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.59534588679183116305361784906322155131e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.62788361787003488572546802835677555151e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32291670834750583053201239125839728061e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.97300476673137879475887158731166178829e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99801949382703479169010768105376163814e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09234481837537672361990844588166022791e-5), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -2) { + RealType t = x + 2; + + // Rational Approximation + // Maximum Relative Error: 3.8590e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[19] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.50763682207511020788551990942118742910e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.35160148798611192350830963080055471564e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.85567614778755464918744664468938413626e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24395902843792338723377508551415399267e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75803588325237557939443967923337822799e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.44751743702858358960016891543930028989e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.38771920793989989423514808134997891434e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.99899457801652012757624005300136548027e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.59668432891116320233415536189782241116e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.02521376213276025040458141317737977692e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.00511857068867825025582508627038721402e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.19031970665203475373248353773765801546e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.03203906044415590651592066934331209362e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.01354553335348149914596284286907046333e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.40077709279222086527834844446288408059e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.07036291955272673946830858788691198641e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75229595324028909877518859428663744660e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.51522041748753421579496885726802106514e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.28554063325397021905295499768922434904e-10), + }; + BOOST_MATH_STATIC const RealType Q[19] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.55889733194498836168215560931863059152e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.45050534010127542130960211621894286688e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39437268390909980446225806216001154876e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85370557677145869100298813360909127310e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99358236671478050470186012149124879556e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.82914467302553175692644992910876515874e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.42426383410763382224410804289834740252e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.69477085497572590673874940261777949808e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.69832833104494997844651343499526754631e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.95708391432781281454592429473451742972e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32541987059874996779040445020449508142e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.24889827757289516008834701298899804535e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76326709965329347689033555841964826234e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.19652942193884551681987290472603208296e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22987197033955835618810845653379470109e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.51893290463268547258382709202599507274e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.43575882043846146581825453522967678538e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.06683418138599962787868832158681391673e-5), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + const static RealType lambda_bias = BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.451582705289454864726195229894882143572); // (= log(pi/2)+1) + + RealType sigma = exp(-x * constants::pi() / 2 - lambda_bias); + RealType s = exp(-sigma) * sqrt(sigma); + + if (x >= -4) { + RealType t = -x - 2; + + // Rational Approximation + // Maximum Relative Error: 7.0019e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.31126317567898819464557840628449107915e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.31008645911415314700225107327351636697e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.60743397071713227215207831174512626190e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.69243936604887410595461520921270733657e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.93778117053417749769040328795824088196e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.04718815412035890861219665332918840537e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.41914050146414549019258775115663029791e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17147074474397510167661838243237386450e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31006358624990533313832878493963971249e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.31424805670861981190416637260176493218e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71604447221961082506919140038819715820e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.01796816886825676412069047911936154422e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.16975381608692872525287947181531051179e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.47194712963929503930146780326366215579e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.19469248860267489980690249379132289464e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22272545853285700254948346226514762534e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.05432616288832680241611577865488417904e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08723511461992818779941378551362882730e-14), + }; + BOOST_MATH_STATIC const RealType Q[16] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01021278581037282130358759075689669228e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.91783545335316986601746168681457332835e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.90025337163174587593060864843160047245e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.09029833197792884728968597136867674585e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44295160726145715084515736090313329125e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.46416375246465800703437031839310870287e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.86521610039165178072099210670199368231e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.29830357713744587265637686549132688965e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32187562202835921333177458294507064946e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75034541113922116856456794810138543224e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.79216314818261657918748858010817570215e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.69179323869133503169292092727333289999e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.09019623876540244217038375274802731869e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.13900582194674129200395213522524183495e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92590979457175565666605415984496551246e-9), + }; + // LCOV_EXCL_STOP + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -6.875) { + RealType t = -x - 4; + + // Rational Approximation + // Maximum Relative Error: 6.4095e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.26864481454444278645937156746132802908e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35052316263030534355724898036735352905e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46701697626917441774916114124028252971e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.03805679118924248671851611170709699862e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.29457230118834515743802694404620370943e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.04992992250026414994541561073467805333e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.21521951889983113700615967351903983850e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50611640491200231504944279876023072268e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.96007721851412367657495076592244098807e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.76876967456744990483799856564174838073e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.34285198828980523126745002596084187049e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.98811180672843179022928339476420108494e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36933707823930146448761204037985193905e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.76515121042989743198432939393805252169e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.87259915481622487665138935922067520210e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34703958446785695676542385299325713141e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.53199672688507288037695102377982544434e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.97283413733676690377949556457649405210e-14), + }; + BOOST_MATH_STATIC const RealType Q[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.15492787140203223641846510939273526038e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.34095796298757853634036909432345998054e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.65650140652391522296109869665871008634e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.44894089102275258806976831589022821974e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.27121975866547045393504246592187721233e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.91803733484503004520983723890062644122e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.40341451263971324381655967408519161854e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72360046810103129487529493828280649599e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.60986435254173073868329335245110986549e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01216966786091058959421242465309838187e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11514619470960373138100691463949937779e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01639426441970732201346798259534312372e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.60411422906070056043690129326288757143e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.58398956202137709744885774931524547894e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.14956902064425256856583295469934064903e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.23201118234279642321630988607491208515e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43185798646451225275728735761433082676e-13), + }; + // LCOV_EXCL_STOP + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + result = 0; + } + } + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_pdf_imp_prec(const RealType& x, const boost::math::integral_constant &tag) { + if (x >= 0) { + return landau_pdf_plus_imp_prec(x, tag); + } + else if (x <= 0) { + return landau_pdf_minus_imp_prec(x, tag); + } + else { + return boost::math::numeric_limits::quiet_NaN(); + } +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_pdf_imp_prec(const RealType& x, const boost::math::integral_constant& tag) { + if (x >= 0) { + return landau_pdf_plus_imp_prec(x, tag); + } + else if (x <= 0) { + return landau_pdf_minus_imp_prec(x, tag); + } + else { + return boost::math::numeric_limits::quiet_NaN(); + } +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_pdf_imp(const landau_distribution& dist, const RealType& x) { + // + // This calculates the pdf of the Landau distribution and/or its complement. + // + + BOOST_MATH_STD_USING // for ADL of std functions + constexpr auto function = "boost::math::pdf(landau<%1%>&, %1%)"; + RealType result = 0; + RealType location = dist.location(); + RealType scale = dist.scale(); + RealType bias = dist.bias(); + + if (false == detail::check_location(function, location, &result, Policy())) + { + return result; + } + if (false == detail::check_scale(function, scale, &result, Policy())) + { + return result; + } + if (false == detail::check_x(function, x, &result, Policy())) + { + return result; + } + + typedef typename tools::promote_args::type result_type; + typedef typename policies::precision::type precision_type; + typedef boost::math::integral_constant tag_type; + + static_assert(tag_type::value, "The Landau distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats"); + + RealType u = (x - location) / scale + bias; + + result = landau_pdf_imp_prec(u, tag_type()) / scale; + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_cdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x < 1) { + // Rational Approximation + // Maximum Relative Error: 2.7348e-18 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(6.34761298487625202628e-1), + static_cast(7.86558857265845597915e-1), + static_cast(4.30220871807399303399e-1), + static_cast(1.26410946316538340541e-1), + static_cast(2.09346669713191648490e-2), + static_cast(1.48926177023501002834e-3), + static_cast(-5.93750588554108593271e-7), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1), + static_cast(1.65227304522196452589e0), + static_cast(1.29276828719607419526e0), + static_cast(5.93815051307098615300e-1), + static_cast(1.69165968013666952456e-1), + static_cast(2.84272940328510367574e-2), + static_cast(2.28001970477820696422e-3), + }; + + result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x); + } + else if (x < 2) { + RealType t = x - 1; + + // Rational Approximation + // Maximum Relative Error: 6.1487e-17 + BOOST_MATH_STATIC const RealType P[6] = { + static_cast(4.22133240358047652363e-1), + static_cast(3.48421126689016131480e-1), + static_cast(1.15402429637790321091e-1), + static_cast(1.90374044978864005061e-2), + static_cast(1.26628667888851698698e-3), + static_cast(-5.75103242931559285281e-7), + }; + BOOST_MATH_STATIC const RealType Q[6] = { + static_cast(1), + static_cast(1.21277435324167238159e0), + static_cast(6.38324046905267845243e-1), + static_cast(1.81723381692749892660e-1), + static_cast(2.80457012073363245106e-2), + static_cast(1.93749385908189487538e-3), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 4) { + RealType t = x - 2; + + // Rational Approximation + // Maximum Relative Error: 3.2975e-17 + BOOST_MATH_STATIC const RealType P[6] = { + static_cast(2.95892137955791216378e-1), + static_cast(2.29083899043580095868e-1), + static_cast(7.09374171394372356009e-2), + static_cast(1.08774274442674552229e-2), + static_cast(7.69674715320139398655e-4), + static_cast(1.63486840000680408991e-5), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1), + static_cast(1.09704883482087441931e0), + static_cast(5.10139057077147935327e-1), + static_cast(1.27055234007499238241e-1), + static_cast(1.74542139987310825683e-2), + static_cast(1.18944143641885993718e-3), + static_cast(2.55296292914537992309e-5), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 8) { + RealType t = x - 4; + + // Rational Approximation + // Maximum Relative Error: 2.6740e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(1.73159318667565938776e-1), + static_cast(6.95847424776057206679e-2), + static_cast(1.04513924567165899506e-2), + static_cast(6.35094718543965631442e-4), + static_cast(1.04166111154771164657e-5), + static_cast(1.43633490646363733467e-9), + static_cast(-4.55493341295654514558e-11), + static_cast(6.71119091495929467041e-13), + }; + BOOST_MATH_STATIC const RealType Q[6] = { + static_cast(1), + static_cast(6.23409270429130114247e-1), + static_cast(1.54791925441839372663e-1), + static_cast(1.85626981728559445893e-2), + static_cast(1.01414235673220405086e-3), + static_cast(1.63385654535791481980e-5), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 16) { + RealType t = x - 8; + + // Rational Approximation + // Maximum Relative Error: 7.6772e-18 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(8.90469147411748292410e-2), + static_cast(2.76033447621178662228e-2), + static_cast(3.26577485081539607943e-3), + static_cast(1.77755752909150255339e-4), + static_cast(4.20716551767396206445e-6), + static_cast(3.19415703637929092564e-8), + static_cast(-1.79900915228302845362e-13), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1), + static_cast(4.36499987260915480890e-1), + static_cast(7.67544181756713372678e-2), + static_cast(6.83535263652329633233e-3), + static_cast(3.15983778969051850073e-4), + static_cast(6.84144567273078698399e-6), + static_cast(5.00300197147417963939e-8), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 32) { + RealType t = x - 16; + + // Rational Approximation + // Maximum Relative Error: 1.5678e-20 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(4.35157264931262089762e-2), + static_cast(8.46833474333913742597e-3), + static_cast(6.43769318301002170686e-4), + static_cast(2.39440197089740502223e-5), + static_cast(4.45572968892675484685e-7), + static_cast(3.76071815793351687179e-9), + static_cast(1.04851094362145160445e-11), + static_cast(-8.50646541795105885254e-18), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1), + static_cast(2.59832721225510968607e-1), + static_cast(2.75929030381330309762e-2), + static_cast(1.53115657043391090526e-3), + static_cast(4.70173086825204710446e-5), + static_cast(7.76185172490852556883e-7), + static_cast(6.10512879655564540102e-9), + static_cast(1.64522607881748812093e-11), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 64) { + RealType t = x - 32; + + // Rational Approximation + // Maximum Relative Error: 2.2534e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(2.11253031965493064317e-2), + static_cast(1.36656844320536022509e-3), + static_cast(2.99036224749763963099e-5), + static_cast(2.54538665523638998222e-7), + static_cast(6.79286608893558228264e-10), + static_cast(-6.92803349600061706079e-16), + static_cast(5.47233092767314029032e-19), + }; + BOOST_MATH_STATIC const RealType Q[6] = { + static_cast(1), + static_cast(9.71506209641408410168e-2), + static_cast(3.52744690483830496158e-3), + static_cast(5.85142319429623560735e-5), + static_cast(4.29686638196055795330e-7), + static_cast(1.06586221304077993137e-9), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(x) < 8) { + RealType t = log2(ldexp(x, -6)); + + // Rational Approximation + // Maximum Relative Error: 3.8057e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(6.60754766433212615409e-1), + static_cast(2.47190065739055522599e-1), + static_cast(4.17560046901040308267e-2), + static_cast(3.71520821873148657971e-3), + static_cast(2.03659383008528656781e-4), + static_cast(2.52070598577347523483e-6), + static_cast(-1.63741595848354479992e-8), + }; + BOOST_MATH_STATIC const RealType Q[6] = { + static_cast(1), + static_cast(3.92836792184266080580e-1), + static_cast(6.64332913820571574875e-2), + static_cast(5.59456053716889879620e-3), + static_cast(3.44201583106671507027e-4), + static_cast(2.74554105716911980435e-6), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x); + } + else if (ilogb(x) < 16) { + RealType t = log2(ldexp(x, -8)); + + // Rational Approximation + // Maximum Relative Error: 1.5585e-18 + BOOST_MATH_STATIC const RealType P[9] = { + static_cast(6.44802371584831601817e-1), + static_cast(2.74177359656349204309e-1), + static_cast(5.53659240731871433983e-2), + static_cast(6.97653365560511851744e-3), + static_cast(6.17058143529799037402e-4), + static_cast(3.94979574476108021136e-5), + static_cast(1.88315864113369221822e-6), + static_cast(6.10941845734962836501e-8), + static_cast(1.39403332890347813312e-9), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1), + static_cast(4.32345127287830884682e-1), + static_cast(8.70500634789942065799e-2), + static_cast(1.09253956356393590470e-2), + static_cast(9.72576825490118007977e-4), + static_cast(6.18656322285414147985e-5), + static_cast(2.96375876501823390564e-6), + static_cast(9.58622809886777038970e-8), + static_cast(2.19059124630695181004e-9), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x); + } + else if (ilogb(x) < 32) { + RealType t = log2(ldexp(x, -16)); + + // Rational Approximation + // Maximum Relative Error: 8.4773e-17 + BOOST_MATH_STATIC const RealType P[9] = { + static_cast(6.36685748306554972132e-1), + static_cast(2.22217783148381285219e-1), + static_cast(3.79173960692559280353e-2), + static_cast(4.13394722917837684942e-3), + static_cast(3.18141233442663766089e-4), + static_cast(1.79745613243740552736e-5), + static_cast(7.47632665728046334131e-7), + static_cast(2.18258684729250152138e-8), + static_cast(3.93038365129320422968e-10), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1), + static_cast(3.49087806008685701060e-1), + static_cast(5.95568283529034601477e-2), + static_cast(6.49386742119035055908e-3), + static_cast(4.99721374204563274865e-4), + static_cast(2.82348248031305043777e-5), + static_cast(1.17436903872210815656e-6), + static_cast(3.42841159307801319359e-8), + static_cast(6.17382517100568714012e-10), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x); + } + else if (ilogb(x) < 64) { + RealType t = log2(ldexp(x, -32)); + + // Rational Approximation + // Maximum Relative Error: 4.1441e-17 + BOOST_MATH_STATIC const RealType P[9] = { + static_cast(6.36619774420718062663e-1), + static_cast(2.68594096777677177874e-1), + static_cast(5.50713044649497737064e-2), + static_cast(7.26574134143434960446e-3), + static_cast(6.89173530168387629057e-4), + static_cast(4.87688310559244353811e-5), + static_cast(2.84218580121660744969e-6), + static_cast(9.65240367429172366675e-8), + static_cast(5.21722720068664704240e-9), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1), + static_cast(4.21906621389193043384e-1), + static_cast(8.65058026826346828750e-2), + static_cast(1.14129998157398060009e-2), + static_cast(1.08255124950652385121e-3), + static_cast(7.66059006900869004871e-5), + static_cast(4.46449501653114622960e-6), + static_cast(1.51619602364037777665e-7), + static_cast(8.19520132288940649002e-9), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x); + } + else { + result = 2 / (constants::pi() * x); + } + + return result; +} + + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_cdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x < 1) { + // Rational Approximation + // Maximum Relative Error: 2.6472e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.34761298487625202628055609797763667089e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.67589195401255255724121983550745957195e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.07502511824371206858547365520593277966e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.58354381655514028012912292026393699991e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.26470588572701739953294573496059174764e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.09494168186680012705692462031819276746e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.47385718073281027400744626077865581325e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.69107567947502492044754464589464306928e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.39641345689672620514703813504927833352e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.27003930699448633502508661352994055898e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26124673422692247711088651516214728305e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.92103390710025598612731036700549416611e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.49572523814120679048097861755172556652e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.50719933268462244255954307285373705456e-13), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.05332427324361912631483249892199461926e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.46280417679002004953145547112352398783e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.10429833573651169023447466152999802738e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.63535585818618617796313647799029559407e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24103322502244219003850826414302390557e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.38359438431541204276767900393091886363e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16687583686405832820912406970664239423e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.31451667102532056871497958974899742424e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31646175307279119467894327494418625431e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.22334681489114534492425036698050444462e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.86326948577818727263376488455223120476e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.53867591038308710930446815360572461884e-7), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x); + } + else if (x < 2) { + RealType t = x - 1; + + // Rational Approximation + // Maximum Relative Error: 1.2387e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.22133240358047652363270514524313049653e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.35860518549481281929441026718420080571e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.89900189271177970319691370395978805326e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.84682995288088652145572170736339265315e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.10748045562955323875797887939420022326e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.00246325517647746481631710824413702051e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.02998394686245118431020407235000441722e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.06095284318730009040434594746639110387e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.91754425158654496372516241124447726889e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.37288564874584819097890713305968351561e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.77487285800889132325390488044487626942e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.41654614425073025870130302460301244273e-13), + }; + BOOST_MATH_STATIC const RealType Q[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13058739144695658589427075788960660400e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11792528400843967390452475642793635419e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28794756779085737559146475126886069030e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.29339015472607099189295465796550367819e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.53434372685847620864540166752049026834e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.17610372643685730837081191600424913542e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.64455304425865128680681864919048610730e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.62357689170951502920019033576939977973e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.89912258835489782923345357128779660633e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.76323449710934127736624596886862488066e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21524231900555452527639738371019517044e-8), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 4) { + RealType t = x - 2; + + // Rational Approximation + // Maximum Relative Error: 1.2281e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.95892137955791216377776422765473500279e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.65957634570689820998348206103212047458e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.34657686985192350529330481818991619730e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43985500841002490334046057189458709493e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.09876223028004323158413173719329449720e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.04194660038290410425299531094974709019e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09780604136364125990393172827373829860e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02676079027875648517286351062161581740e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.30298199082321832830328345832636435982e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.33633965123855006982811143987691483957e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.46384114966020719170903077536685621119e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.07058773850795175564735754911699285828e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.76765053309825506619419451346428518606e-16), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.89758965744489334954041814073547951925e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.65985298582650601001220682594742473012e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.81017086203232617734714711306180675445e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.14481301672800918591822984940714490526e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.90605450026850685321372623938646722657e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.42447999818015246265718131846902731574e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83426770079526980292392341278413549820e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.65073357441521690641768959521412898756e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.90437453546925074707222505750595530773e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.63458145595422196447107547750737429872e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.94457070577990681786301801930765271001e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.80986568964737305842778359322566801845e-11), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 8) { + RealType t = x - 4; + + // Rational Approximation + // Maximum Relative Error: 5.3269e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.73159318667565938775602634998889798568e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.95149372103869634275319490207451722385e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.87504411659823400690797222216564651939e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.94571385159717824767058200278511014560e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.71656210265434934399632978675652106638e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.51248899957476233641240573020681464290e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.74490600490886011190565727721143414249e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.07537323853509621126318424069471060527e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59167561354023258538869598891502822922e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.72608361427131857269675430568328018022e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.54143016370650707528704927655983490119e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.07936446902207128577031566135957311260e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.73506415766100115673754920344659223382e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66918494546396383814682000746818494148e-21), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34854858486201481385140426291984169791e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.25379978655428608198799717171321453517e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.01905621587554903438286661709763596137e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.31293339647901753103699339801273898688e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22793491714510746538048140924864505813e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45360205736839126407568005196865547577e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20918479556021574336548106785887700883e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.91450617548036413606169102407934734864e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.59940586452863361281618661053014404930e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.93918243796178165623395356401173295690e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.14578198844767847381800490360878776998e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.25951924258762195043744665124187621023e-13), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 16) { + RealType t = x - 8; + + // Rational Approximation + // Maximum Relative Error: 4.8719e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.90469147411748292410422813492550092930e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.20196598836093298098360769875443462143e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92579652651763461802771336515384878994e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.50439147419887323351995227585244144060e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.13214742069751393867851080954754449610e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.29648648382394801501422003194522139519e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.80420399625810952886117129805960917210e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.73059844436212109742132138573157222143e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66835461298243901306176013397428732836e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.61808423521250921041207160217989047728e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.39300098366988229510997966682317724011e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09447823064238788960158765421669935819e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76962456941948786610101052244821659252e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.56004343709960620209823076030906442732e-25), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.22996422556023111037354479836605618488e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.05244279198013248402385148537421114680e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.72477335169177427114629223821992187549e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.76404568980852320252614006021707040788e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.79959793426748071158513573279263946303e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.93524788220877416643145672816678561612e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.46692111397574773931528693806744007042e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20764040991846422990601664181377937629e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.84452460717254884659858711994943474216e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.30317379590981344496250492107505244036e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.83736710938966780518785861828424593249e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.72943283576264035508862984899450025895e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.77791087299927741360821362607419036797e-18), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 32) { + RealType t = x - 16; + + // Rational Approximation + // Maximum Relative Error: 5.3269e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.35157264931262089761621934621402648954e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.51407493866635569361305338029611888082e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30886132894858313459359493329266696766e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.02488735053241778868198537544867092626e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12676055870976203566712705442945186614e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.14136757304740001515364737551021389293e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01514327671186735593984375829685709678e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63783530594707477852365258482782354261e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.67623013776194044717097141295482922572e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01397549144502050693284434189497148608e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.16720246008161901837639496002941412533e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.70776057051329137176494230292143483874e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.02838174509144355795908173352005717435e-26), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.07343581702278433243268463675468320030e-30), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.13166129191902183515154099741529804400e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.58587877615076239769720197025023333190e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.14619910799508944167306046977187889556e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.66671218029939293563302720748492945618e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.67192565058098643223751044962155343554e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.68397391192695060767615969382391508636e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.94009678375859797198831431154760916459e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.91901267471125881702216121486397689200e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.83697721782125852878533856266722593909e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.65409094893730117412328297801448869154e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.04202174160401885595563150562438901685e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.82104711207466136473754349696286794448e-20), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 64) { + RealType t = x - 32; + + // Rational Approximation + // Maximum Relative Error: 1.0937e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11253031965493064317003259449214452745e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.66886306590939856622089350675801752704e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.77786526684921036345823450504680078696e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20343424607276252128027697088363135591e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.29196073776799916444272401212341853981e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.92132293422644089278551376756604946339e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.07465707745270914645735055945940815947e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.54424785613626844024154493717770471131e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74829439628215654062512023453584521531e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.03470347880592072854295353687395319489e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21113051919776165865529140783521696702e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.83719812218384126931626509884648891889e-24), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.41009036423458926116066353864843586169e-31), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04965807080681693416200699806159303323e-34), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06133417626680943824361625182288165823e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.87991711814130682492211639336942588926e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.98342969282034680444232201546039059255e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.41940454945139684365514171982891170420e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.90481418770909949109210069475433304086e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.25451856391453896652473393039014954572e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36349120987010174609224867075354225138e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.94716367816033715208164909918572061643e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.67466470065187852967064897686894407151e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31509787633232139845762764472649607555e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.93147765040455324545205202900563337981e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07370974123835247210519262324524537634e-23), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(x) < 8) { + RealType t = log2(ldexp(x, -6)); + + // Rational Approximation + // Maximum Relative Error: 3.1671e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.60754766433212615408805486898847664740e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.76143516602438873568296501921670869526e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.25254763859315398784817302471631188095e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.58650277225655302085863010927524053686e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92227773746592457803942136197158658110e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.77170481512334811333255898903061802339e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.97864282716826576471164657368231427231e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.44243747123065035356982629201975914275e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.54592817957461998135980337838429682406e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.52110831321633404722419425039513444319e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75030698219998735693228347424295850790e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.08894662488905377548940479566994482806e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11472306961184868827300852021969296872e-12), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04202386226609593823214781180612848612e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.07648212684952405730772649955008739292e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50646784687432427178774105515508540021e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.02521400964223268224629095722841793118e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.35374201758795213489427690294679848997e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.67290684433876221744005507243460683585e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.83834977086311601362115427826807705185e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.43236252790815493406777552261402865674e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17693398496807851224497995174884274919e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.34161978291568722756523120609497435933e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10819003833429876218381886615930538464e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.75674945131892236663189757353419870796e-12), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x); + } + else if (ilogb(x) < 16) { + RealType t = log2(ldexp(x, -8)); + + // Rational Approximation + // Maximum Relative Error: 6.8517e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.44802371584831601817146389426921705500e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.32962058761590152378007743852342151897e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.93461601407042255925193793376118641680e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.55533612685775705468614711945893908392e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76358919439168503100357154639460097607e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74131615534562303144125602950691629908e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.34470363614899824502654995633001232079e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.29020592733459891982428815398092077306e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65794017754267756566941255128608603072e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78550878208007836345763926019855723350e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00524022519953193863682806155339574713e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.75977976583947697667784048133959750133e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.89287460618943500291479647438555099783e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.27647727947590174240069836749437647626e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70582660582766959108625375415057711766e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.68499175244574169768386088971844067765e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.84493774639724473576782806157757824413e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.03526708437207438952843827018631758857e-20), + }; + BOOST_MATH_STATIC const RealType Q[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.88770963972332750838571146142568699263e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.60273143287476497658795203149608758815e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34393017137025732376113353720493995469e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.77086140458900002000076127880391602253e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.30613589576665986239534705717153313682e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.25355055770024448240128702278455001334e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.16820392686312531160900884133254461634e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.17518073757769640772428097588524967431e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.80469112215756035261419102003591533407e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57898123952200478396366475124854317231e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.47675840885248141425130440389244781221e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.97339213584115778189141444065113447170e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.85845602624148484344802432304264064957e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.67971825826765902713812866354682255811e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.78795543918651402032912195982033010270e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.18166403020940538730241286150437447698e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.47995312365747437038996228794650773820e-20), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x); + } + else if (ilogb(x) < 32) { + RealType t = log2(ldexp(x, -16)); + + // Rational Approximation + // Maximum Relative Error: 6.5315e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36685748306554972131586673701426039950e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75892450098649456865500477195142009984e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.77167300709199375935767980419262418694e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.76657987434662206916119089733639111866e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.55003354250569146980730594644539195376e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.64102805555049236216024194001407792885e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.36247488122195469059567496833809879653e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63710218182036673197103906176200862606e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.60629446465979003842091012679929186607e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22712292003775206105713577811447961965e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.28539646473359376707298867613704501434e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47893806904387088760579412952474847897e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.86809622035928392542821045232270554753e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.47935154807866802001012566914901169147e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.40623855123515207599160827187101517978e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.60738876249485914019826585865464103800e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29257874466803586327275841282905821499e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.06781616867813418930916928811492801723e-31), + }; + BOOST_MATH_STATIC const RealType Q[17] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.33391038950576592915531240096703257292e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.06598147816889621749840662500099582486e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21997213397347849640608088189055469954e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18596259982438670449688554459343971428e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.86085649929528605647139297483281849158e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.28178198640304770056854166079598253406e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.57155234493390297220982397633114062827e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.03771216318243964850930846579433365529e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.49837117625052865973189772546210716556e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.87302306206908338457432167186661027909e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.32312467403555290915110093627622951484e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.50516769529388616534895145258103120804e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.03618834064000582669276279973634033592e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.49205787058220657972891114812453768100e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.23730041323226753771240724078738146658e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.60115971929371066362271909482282503973e-23), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x); + } + else if (ilogb(x) < 64) { + RealType t = log2(ldexp(x, -32)); + + // Rational Approximation + // Maximum Relative Error: 1.0538e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[19] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36619774420718062663274858007687066488e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30268560944740805268408378762250557522e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.09208091036436297425427953080968023835e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.74943166696408995577495065480328455423e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.03759498310898586326086395411203400316e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.67261192787197720215143001944093963953e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.42934578939412238889174695091726883834e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.32436794923711934610724023467723195718e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35077626369701051583611707128788137675e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.40836846523442062397035620402082560833e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.98783806012035285862106614557391807137e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.53869567415427145730376778932236900838e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.76521311340629419738016523643187305675e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41298928566351198899106243930173421965e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85552706372195482059144049293491755419e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.89780987301820615664133438159710338126e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37965575161090804572561349091024723962e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.58944184323201470938493323680744408698e-26), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.35050162268451658064792430214910233545e-40), + }; + BOOST_MATH_STATIC const RealType Q[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.61705010674524952791495931314010679992e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.42782564900556152436041716057503104160e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.46038982970912591009739894441944631471e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.34223936001873800295785537132905986678e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.19812900355867749521882613003222797586e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.24521111398180921205229795007228494287e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.93429394949368809594897465724934596442e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.69259071867341718986156650672535675726e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16370379759046264903196063336023488714e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.12248872253003553623419554868303473929e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.12936649424676303532477421399492615666e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37683645610869385656713212194971883914e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21951837982136344238516771475869548147e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.91465509588513270823718962232280739302e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.98107277753797219142748868489983891831e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.16715818685246698314459625236675887448e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.77987471623502330881961633434056523159e-26), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x); + } + else if (ilogb(x) < 128) { + RealType t = log2(ldexp(x, -64)); + + // Rational Approximation + // Maximum Relative Error: 2.2309e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36619772367581344040890134127619524371e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72522424358877592972375801826826390634e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.74749058021341871895402838175268752603e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.95136532385982168410320513292144834602e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.10500792867575154180588502397506694341e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.48101840822895419487033057691746982216e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.22577211783918426674527460572438843266e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30499751609793470641331626931224574780e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07043764292750472900578756659402327450e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.28345765853059515246787820662932506931e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48838700086419232178247558529254516870e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.51015062047870581993810118835353083110e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19087584836023628483830612541904830502e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74405125338538967114280887107628943111e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.34493122865874905104884954420903910585e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.33794243240353561095702650271950891264e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07931799710240978706633227327649731325e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60935135769719557933955672887720342220e-23), + }; + BOOST_MATH_STATIC const RealType Q[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.28077223152164982690351137450174450926e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.02813709168750724641877726632095676090e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24899754437233214579860634420198464016e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27313166830073839667108783881090842820e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01803599095361490387188839658640162684e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.63782732057408167492988043000134055952e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.62068163155799642595981598061154626504e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.68143951757351157612001096085234448512e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.72843955600132743549395255544732133507e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.33795283380674591910584657171640729449e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.08452802793967494363851669977089389376e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.87062340827301546650149031133192913586e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.31034562935470222182311379138749593572e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.39579834094849668082821388907704276985e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46680056726416759571957577951115309094e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.69538874529209016624246362786229032706e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.52796320119313458991885552944744518437e-23), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x); + } + else { + result = 2 / (constants::pi() * x); + } + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_cdf_minus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x >= -1) { + RealType t = x + 1; + + // Rational Approximation + // Maximum Relative Error: 4.8279e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(9.61609610406317335842e-2), + static_cast(3.91836314722738553695e-1), + static_cast(6.79862925205625107133e-1), + static_cast(6.52516594941817706368e-1), + static_cast(3.78594163612581127974e-1), + static_cast(1.37741592243008345389e-1), + static_cast(3.16100502353317199197e-2), + static_cast(3.94935603975622336575e-3), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1), + static_cast(1.76863983252615276767e0), + static_cast(1.81486018095087241378e0), + static_cast(1.17295504548962999723e0), + static_cast(5.33998066342362562313e-1), + static_cast(1.66508320794082632235e-1), + static_cast(3.42192028846565504290e-2), + static_cast(3.94691613177524994796e-3), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -2) { + RealType t = x + 2; + + // Rational Approximation + // Maximum Relative Error: 2.3675e-17 + BOOST_MATH_STATIC const RealType P[11] = { + static_cast(7.07114056489178077423e-4), + static_cast(7.35277969197058909845e-3), + static_cast(3.45402694579204809691e-2), + static_cast(9.62849773112695332289e-2), + static_cast(1.75738736725818007992e-1), + static_cast(2.18309266582058485951e-1), + static_cast(1.85680388782727289455e-1), + static_cast(1.06177394398691169291e-1), + static_cast(3.94880388335722224211e-2), + static_cast(9.46543177731050647162e-3), + static_cast(1.50949646857411896396e-3), + }; + BOOST_MATH_STATIC const RealType Q[11] = { + static_cast(1), + static_cast(1.19520021153535414164e0), + static_cast(2.24057032777744601624e0), + static_cast(1.63635577968560162720e0), + static_cast(1.58952087228427876880e0), + static_cast(7.63062254749311648018e-1), + static_cast(4.65805990343825931327e-1), + static_cast(1.45821531714775598887e-1), + static_cast(5.42393925507104531351e-2), + static_cast(9.84276292481407168381e-3), + static_cast(1.54787649925009672534e-3), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + const static RealType lambda_bias = static_cast(1.45158270528945486473); // (= log(pi/2)+1) + + RealType sigma = exp(-x * constants::pi() / 2 - lambda_bias); + RealType s = exp(-sigma) / sqrt(sigma); + + if (x >= -4) { + RealType t = -x - 2; + + // Rational Approximation + // Maximum Relative Error: 6.6532e-17 + BOOST_MATH_STATIC const RealType P[9] = { + static_cast(3.71658823632747235572e-1), + static_cast(2.81493346318174084721e-1), + static_cast(1.80052521696460721846e-1), + static_cast(7.65907659636944822120e-2), + static_cast(2.33352148213280934280e-2), + static_cast(5.02308701022480574067e-3), + static_cast(6.29239919421134075502e-4), + static_cast(8.36993181707604609065e-6), + static_cast(-8.38295154747385945293e-6), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1), + static_cast(6.62107509936390708604e-1), + static_cast(4.72501892305147483696e-1), + static_cast(1.84446743813050604353e-1), + static_cast(5.99971792581573339487e-2), + static_cast(1.24751029844082800143e-2), + static_cast(1.56705297654475773870e-3), + static_cast(2.36392472352050487445e-5), + static_cast(-2.11667044716450080820e-5), + }; + + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -5.1328125) { + RealType t = -x - 4; + + // Rational Approximation + // Maximum Relative Error: 2.6331e-17 + BOOST_MATH_STATIC const RealType P[10] = { + static_cast(3.97500903816385095134e-1), + static_cast(5.08559630146730380854e-1), + static_cast(2.99190443368166803486e-1), + static_cast(1.07339363365158174786e-1), + static_cast(2.61694301269384158162e-2), + static_cast(4.58386867966451237870e-3), + static_cast(5.80610284231484509069e-4), + static_cast(5.07249042503156949021e-5), + static_cast(2.91644292826084281875e-6), + static_cast(9.75453868235609527534e-12), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1), + static_cast(1.27376091725485414303e0), + static_cast(7.49829208702328578188e-1), + static_cast(2.69157374996960976399e-1), + static_cast(6.55795320040378662663e-2), + static_cast(1.14912646428788757804e-2), + static_cast(1.45541420582309879973e-3), + static_cast(1.27135040794481871472e-4), + static_cast(7.31138551538712031061e-6), + }; + + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + result = 0; + } + } + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_cdf_minus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x >= -1) { + RealType t = x + 1; + + // Rational Approximation + // Maximum Relative Error: 1.2055e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[16] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.61609610406317335842332400044553397267e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.74152295981095898203847178356629061821e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.58642905042588731020840168744866124345e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.69370085525311304330141932309908104187e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.14888713497930800611167630826754270499e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.69123861559106636252620023643265102867e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74273532954853421626852458737661546439e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.73534665976007761924923962996725209700e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.42543389723715037640714282663089570985e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05120903211852044362181935724880384488e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.49586169587615171270941258051088627885e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.46047939521303565932576405363107506886e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.68248726161641913236972878212857788320e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.60663638253775180681171554635861859625e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76463460016745893121574217030494989443e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.08380585744336744543979680558024295296e-12), + }; + BOOST_MATH_STATIC const RealType Q[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.66458574743150749245922924142120646408e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.87010262350733534202724862784081296105e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.51107149980251214963849267707173045433e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.71158207369578457239679595370389431171e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.37188705505573668092513124472448362633e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.95647530096628718695081507038921183627e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30278895428001081342301218278371140110e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.61322060563420594659487640090297303892e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30529729106312748824241317854740876915e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.90465740298431311519387111139787971960e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.92760416706194729215037805873466599319e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.02070496615845146626690561655353212151e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72080705566714681586449384371609107346e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76433504120625478720883079263866245392e-6), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -2) { + RealType t = x + 2; + + // Rational Approximation + // Maximum Relative Error: 3.4133e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[19] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.07114056489178077422539043012078031613e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.18006784954579394004360967455655021959e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.60309646092161147676756546417366564213e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13479499932401667065782086621368143322e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.68587439643060549883916236839613331692e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12366494749830793876926914920462629077e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70739124754664545339208363069646589169e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.04073482998938337661285862393345731336e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.94833709787596305918524943438549684109e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50214412821697972546222929550410139790e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.43105005523280337071698704765973602884e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.85396789833278250392015217207198739243e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.05690359993570736607428746439280858381e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.17297815188944531843360083791153470475e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.03913601629627587800587620822216769010e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.61963034255210565218722882961703473760e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.99502258440875586452963094474829571000e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.66563884565518965562535171848480872267e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.23954896921292896539048530795544784261e-6), + }; + BOOST_MATH_STATIC const RealType Q[19] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.77934931846682015134812629288297137499e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.85052416252910403272283619201501701345e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45276747409453182009917448097687214033e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.87717215449690275562288513806049961791e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96583424263422661540930513525639950307e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.73001838976297286477856104855182595364e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29209801725936746054703603946844929105e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.31809396176316042818100839595926947461e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.62125101720695030847208519302530333864e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22912823173107974750307098204717046200e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.28404310708078592866397210871397836013e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.33433860799478110495440617696667578486e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01779942752411055394079990371203135494e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.60870827161929649807734240735205100749e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.43275518144078080917466090587075581039e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.80287554756375373913082969626543154342e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00697535360590561244468004025972321465e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.23883308105457761862174623664449205327e-6), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + const static RealType lambda_bias = BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.451582705289454864726195229894882143572); // (= log(pi/2)+1) + + RealType sigma = exp(-x * constants::pi() / 2 - lambda_bias); + RealType s = exp(-sigma) / sqrt(sigma); + + if (x >= -4) { + RealType t = -x - 2; + + // Rational Approximation + // Maximum Relative Error: 9.2619e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[19] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.71658823632747235572391863987803415545e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20402452680758356732340074285765302037e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.53870483364594487885882489517365212394e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.73525449564340671962525942038149851804e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.67339872142847248852186397385576389802e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.60644488744851390946293970736919678433e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33042051950636491987775324999025538357e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.13846819893538329440033115143593487041e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41648498082970622389678372669789346515e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.74006867625631068946791714035394785978e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.12238896415831258936563475509362795783e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88070293465108791701905953972140154151e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.24813015654516014181209691083399092303e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.64092873079064926551281731026589848877e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.09892207654972883190432072151353819511e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.86990125202059013860642688739159455800e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62986611607135348214220687891374676368e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07567013469555215514702758084138467446e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.84619752008414239602732630339626773669e-14), + }; + BOOST_MATH_STATIC const RealType Q[17] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.28669950018285475182750690468224641923e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.12421557061005325313661189943328446480e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.68376064122323574208976258468929505299e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22010354939562426718305463635398985290e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.13795955314742199207524303721722785075e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.90452274425830801819532524004271355513e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.38324283887272345859359008873739301544e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.15232844484261129757743512155821350773e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.79562237779621711674853020864686436450e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.64370777996591099856555782918006739330e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.02327782881305686529414731684464770990e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.27181015755595543140221119020333695667e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01121287947061613072815935956604529157e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44038164966032378909755215752715620878e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.39138685106442954199109662617641745618e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.83317957765031605023198891326325990178e-10), + }; + // LCOV_EXCL_STOP + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -6.875) { + RealType t = -x - 4; + + // Rational Approximation + // Maximum Relative Error: 4.9208e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[20] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.97500903816385095134217223320239082420e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02058997410109156148729828665298333233e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30492992901887465108077581566548743407e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08695332228530157560495896731847709498e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.54469321766529692240388930552986490213e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.00543201281990041935310905273146022998e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08633547932070289660163851972658637916e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.15432691192536747268886307936712580254e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.46179071338871656505293487217938889935e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45295210106393905833273975344579255175e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.34638105523514101671944454719592801562e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.15786069528793080046638424661219527619e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.54781306296697568446848038567723598851e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31977279631544580423883461084970429143e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.56616743805004179430469197497030496870e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60913959062328670735884196858280987356e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.91123354712008822789348244888916948822e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.82453513391091361890763400931018529659e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.12671859603774617133607658779709622453e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.03211544596001317143519388487481133891e-20), + }; + BOOST_MATH_STATIC const RealType Q[19] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.56188463983858614833914386500628633184e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.27273165410457713017446497319550252691e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72495122287308474449946195751088057230e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.64049710819255633163836824600620426349e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.53329810455612298967902432399110414761e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72302144446588066369304547920758875106e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.90680157119357595265085115978578965640e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.87039785683949322939618337154059874729e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.64199530594973983893552925652598080310e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.88147828823178863054226159776600116931e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.91569503818223078110818909039307983575e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.89289385694964650198403071737653842880e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.32154679053642509246603754078168127853e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.43239674842248090516375370051832849701e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.03349866320207008385913232167927124115e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.98307302768178927108235662166752511325e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.07996042577029996321821937863373306901e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.53576500935979732855511826033727522138e-13), + }; + // LCOV_EXCL_STOP + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + result = 0; + } + } + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_cdf_imp_prec(const RealType& x, bool complement, const boost::math::integral_constant& tag) { + if (x >= 0) { + return complement ? landau_cdf_plus_imp_prec(x, tag) : 1 - landau_cdf_plus_imp_prec(x, tag); + } + else if (x <= 0) { + return complement ? 1 - landau_cdf_minus_imp_prec(x, tag) : landau_cdf_minus_imp_prec(x, tag); + } + else { + return boost::math::numeric_limits::quiet_NaN(); + } +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_cdf_imp_prec(const RealType& x, bool complement, const boost::math::integral_constant& tag) { + if (x >= 0) { + return complement ? landau_cdf_plus_imp_prec(x, tag) : 1 - landau_cdf_plus_imp_prec(x, tag); + } + else if (x <= 0) { + return complement ? 1 - landau_cdf_minus_imp_prec(x, tag) : landau_cdf_minus_imp_prec(x, tag); + } + else { + return boost::math::numeric_limits::quiet_NaN(); + } +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_cdf_imp(const landau_distribution& dist, const RealType& x, bool complement) { + // + // This calculates the cdf of the Landau distribution and/or its complement. + // + + BOOST_MATH_STD_USING // for ADL of std functions + constexpr auto function = "boost::math::cdf(landau<%1%>&, %1%)"; + RealType result = 0; + RealType location = dist.location(); + RealType scale = dist.scale(); + RealType bias = dist.bias(); + + if (false == detail::check_location(function, location, &result, Policy())) + { + return result; + } + if (false == detail::check_scale(function, scale, &result, Policy())) + { + return result; + } + if (false == detail::check_x(function, x, &result, Policy())) + { + return result; + } + + typedef typename tools::promote_args::type result_type; + typedef typename policies::precision::type precision_type; + typedef boost::math::integral_constant tag_type; + + static_assert(tag_type::value, "The Landau distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats"); + + RealType u = (x - location) / scale + bias; + + result = landau_cdf_imp_prec(u, complement, tag_type()); + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_quantile_lower_imp_prec(const RealType& p, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (p >= 0.375) { + RealType t = p - static_cast < RealType>(0.375); + + // Rational Approximation + // Maximum Absolute Error: 3.0596e-17 + BOOST_MATH_STATIC const RealType P[6] = { + static_cast(3.74557416577759554506e-2), + static_cast(3.87808262376545756299e0), + static_cast(4.03092288183382979104e0), + static_cast(-1.65221829710249468257e1), + static_cast(-6.99689838230114367276e0), + static_cast(1.51123479911771488314e1), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1), + static_cast(4.37863773851525662884e-1), + static_cast(-6.35020262707816744534e0), + static_cast(3.07646508389502660442e-1), + static_cast(9.72566583784248877260e0), + static_cast(-2.72338088170674280735e0), + static_cast(-1.58608957980133006476e0), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (p >= 0.25) { + RealType t = p - static_cast < RealType>(0.25); + + // Rational Approximation + // Maximum Absolute Error: 5.2780e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(-4.17764764050720190117e-1), + static_cast(1.27887601021900963655e0), + static_cast(1.80329928265996817279e1), + static_cast(2.35783605878556791719e1), + static_cast(-2.67160590411398800149e1), + static_cast(-2.36192101013335692266e1), + static_cast(8.30396110938939237358e0), + }; + BOOST_MATH_STATIC const RealType Q[6] = { + static_cast(1), + static_cast(5.37459525158081633669e0), + static_cast(2.35696607501498012129e0), + static_cast(-1.71117034150268575909e1), + static_cast(-6.72278235529877170403e0), + static_cast(1.27763043804603299034e1), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (p >= 0.125) { + RealType t = p - static_cast < RealType>(0.125); + + // Rational Approximation + // Maximum Absolute Error: 6.3254e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(-8.77109518013577785811e-1), + static_cast(-1.03442936529923615496e1), + static_cast(-1.03389868296950570121e1), + static_cast(2.01575691867458616553e2), + static_cast(4.59115079925618829199e2), + static_cast(-3.38676271744958577802e2), + static_cast(-5.38213647878547918506e2), + static_cast(1.99214574934960143349e2), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1), + static_cast(1.64177607733998839003e1), + static_cast(8.10042194014991761178e1), + static_cast(7.61952772645589839171e1), + static_cast(-2.52698871224510918595e2), + static_cast(-1.95365983250723202416e2), + static_cast(2.61928845964255538379e2), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -4) { + RealType t = -log2(ldexp(p, 3)); + + // Rational Approximation + // Maximum Relative Error: 3.5192e-18 + BOOST_MATH_STATIC const RealType P[6] = { + static_cast(-8.77109518013577852585e-1), + static_cast(-1.08703720146608358678e0), + static_cast(-4.34198537684719253325e-1), + static_cast(-6.97264194535092564620e-2), + static_cast(-4.20721933993302797971e-3), + static_cast(-6.27420063107527426396e-5), + }; + BOOST_MATH_STATIC const RealType Q[6] = { + static_cast(1), + static_cast(8.38688797993971740640e-1), + static_cast(2.47558526682310722526e-1), + static_cast(3.03952783355954712472e-2), + static_cast(1.39226078796010665644e-3), + static_cast(1.43993679246435688244e-5), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -8) { + RealType t = -log2(ldexp(p, 4)); + + // Rational Approximation + // Maximum Relative Error: 1.1196e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(-1.16727296241754548410e0), + static_cast(-1.12325365855062172009e0), + static_cast(-3.96403456954867129566e-1), + static_cast(-6.50024588048629862189e-2), + static_cast(-5.08582387678609504048e-3), + static_cast(-1.71657051345258316598e-4), + static_cast(-1.81536405273085024830e-6), + static_cast(-9.65262938333207656548e-10), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1), + static_cast(7.55271574611337871389e-1), + static_cast(2.16323131117540100488e-1), + static_cast(2.92693206540519768049e-2), + static_cast(1.89396907936678571916e-3), + static_cast(5.20017914327360594265e-5), + static_cast(4.18896774212993675707e-7), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -16) { + RealType t = -log2(ldexp(p, 8)); + + // Rational Approximation + // Maximum Relative Error: 1.0763e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(-1.78348038398799868409e0), + static_cast(-7.74779087785346936524e-1), + static_cast(-1.27121601027522656374e-1), + static_cast(-9.86675785835385622362e-3), + static_cast(-3.69510132425310943600e-4), + static_cast(-6.00811940375633438805e-6), + static_cast(-3.06397799506512676163e-8), + static_cast(-7.34821360521886161256e-12), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1), + static_cast(3.76606062137668223823e-1), + static_cast(5.37821995022686641494e-2), + static_cast(3.62736078766811383733e-3), + static_cast(1.16954398984720362997e-4), + static_cast(1.59917906784160311385e-6), + static_cast(6.41144889614705503307e-9), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -32) { + RealType t = -log2(ldexp(p, 16)); + + // Rational Approximation + // Maximum Relative Error: 9.9936e-18 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(-2.32474749499506229415e0), + static_cast(-4.81681429397597263092e-1), + static_cast(-3.79696253130015182335e-2), + static_cast(-1.42328672650093755545e-3), + static_cast(-2.58335052925986849305e-5), + static_cast(-2.03945574260603170161e-7), + static_cast(-5.04229972664978604816e-10), + static_cast(-5.49506755992282162712e-14), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1), + static_cast(1.87186049570056737301e-1), + static_cast(1.32852903862611979806e-2), + static_cast(4.45262195863310928309e-4), + static_cast(7.13306978839226580931e-6), + static_cast(4.84555343060572391776e-8), + static_cast(9.65086092007764297450e-11), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -64) { + RealType t = -log2(ldexp(p, 32)); + + // Rational Approximation + // Maximum Relative Error: 9.2449e-18 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(-2.82318656228158372998e0), + static_cast(-2.84346379198027589453e-1), + static_cast(-1.09194719815749710073e-2), + static_cast(-1.99728160102967185378e-4), + static_cast(-1.77069359938827653381e-6), + static_cast(-6.82828539186572955883e-9), + static_cast(-8.22634582905944543176e-12), + static_cast(-4.10585514777842307175e-16), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1), + static_cast(9.29910333991046040738e-2), + static_cast(3.27860300729204691815e-3), + static_cast(5.45852206475929614010e-5), + static_cast(4.34395271645812189497e-7), + static_cast(1.46600782366946777467e-9), + static_cast(1.45083131237841500574e-12), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -128) { + RealType t = -log2(ldexp(p, 64)); + + // Rational Approximation + // Maximum Relative Error: 8.6453e-18 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(-3.29700011190686231229e0), + static_cast(-1.62920309130909343601e-1), + static_cast(-3.07152472866757852259e-3), + static_cast(-2.75922040607620211449e-5), + static_cast(-1.20144242264703283024e-7), + static_cast(-2.27410079849018964454e-10), + static_cast(-1.34109445298156050256e-13), + static_cast(-3.08843378675512185582e-18), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1), + static_cast(4.62324092774919223927e-2), + static_cast(8.10410923007867515072e-4), + static_cast(6.70843016241177926470e-6), + static_cast(2.65459014339231700938e-8), + static_cast(4.45531791525831169724e-11), + static_cast(2.19324401673412172456e-14), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -256) { + RealType t = -log2(ldexp(p, 128)); + + // Rational Approximation + // Maximum Relative Error: 8.2028e-18 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(-3.75666995985336008568e0), + static_cast(-9.15751436135409108392e-2), + static_cast(-8.51745858385908954959e-4), + static_cast(-3.77453552696508401182e-6), + static_cast(-8.10504146884381804474e-9), + static_cast(-7.55871397276946580837e-12), + static_cast(-2.19023097542770265117e-15), + static_cast(-2.34270094396556916060e-20), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1), + static_cast(2.30119177073875808729e-2), + static_cast(2.00787377759037971795e-4), + static_cast(8.27382543511838001513e-7), + static_cast(1.62997898759733931959e-9), + static_cast(1.36215810410261098317e-12), + static_cast(3.33957268115953023683e-16), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -512) { + RealType t = -log2(ldexp(p, 256)); + + // Rational Approximation + // Maximum Relative Error: 7.8900e-18 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(-4.20826069989721597050e0), + static_cast(-5.07864788729928381957e-2), + static_cast(-2.33825872475869133650e-4), + static_cast(-5.12795917403072758309e-7), + static_cast(-5.44657955194364350768e-10), + static_cast(-2.51001805474510910538e-13), + static_cast(-3.58448226638949307172e-17), + static_cast(-1.79092368272097571876e-22), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1), + static_cast(1.14671758705641048135e-2), + static_cast(4.98614103841229871806e-5), + static_cast(1.02397186002860292625e-7), + static_cast(1.00544286633906421384e-10), + static_cast(4.18843275058038084849e-14), + static_cast(5.11960642868907665857e-18), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -1024) { + RealType t = -log2(ldexp(p, 512)); + + // Rational Approximation + // Maximum Relative Error: 7.6777e-18 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(-4.65527239540648658214e0), + static_cast(-2.78834161568280967534e-2), + static_cast(-6.37014695368461940922e-5), + static_cast(-6.92971221299243529202e-8), + static_cast(-3.64900562915285147191e-11), + static_cast(-8.32868843440595945586e-15), + static_cast(-5.87602374631705229119e-19), + static_cast(-1.37812578498484605190e-24), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1), + static_cast(5.72000087046224585566e-3), + static_cast(1.24068329655043560901e-5), + static_cast(1.27105410419102416943e-8), + static_cast(6.22649556008196699310e-12), + static_cast(1.29416254332222127404e-15), + static_cast(7.89365027125866583275e-20), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else{ + result = -boost::math::numeric_limits::infinity(); + } + + return result; +} + + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_quantile_lower_imp_prec(const RealType& p, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (p >= 0.375) { + RealType t = p - 0.375; + + // Rational Approximation + // Maximum Absolute Error: 2.5723e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.74557416577759248536854968412794870581e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.04379368253541440583870397314012269006e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.12622841210720956864564105821904588447e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.57744422491408570970393103737579322242e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.13509711945094517370264490591904074504e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.18322789179144512109337184576079775889e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21447613719864832622177316196592738866e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.49076304733407444404640803736504398642e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.96654951892056950374719952752959986017e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.73083458872938872583408218098970368331e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.22584946471889320670122404162385347867e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.98534922151507267157370682137856253991e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.09159286510191893522643172277831735606e0), + }; + BOOST_MATH_STATIC const RealType Q[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.86204686129323171601167115178777357431e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.43698274248278918649234376575855135232e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.75240332521434608696943994815649748669e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.31438891446345558658756610288653829009e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10716029191240549289948990305434475528e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.10878330779477313404660683539265890549e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.52360069933886703736010179403700697679e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.15864312939821257811853678185928982258e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.10341116017481903631605786613604619909e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29121822170912306719250697890270750964e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.56489746112937744052098794310386515793e1), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (p >= 0.25) { + RealType t = p - 0.25; + + // Rational Approximation + // Maximum Absolute Error: 6.1583e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.17764764050720242897742634974454113395e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.80044093802431965072543552425830082205e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.23613318632011593171919848575560968064e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.77438013844838858458786448973516177604e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.62569530523012138862025718052954558264e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.02005706260864894793795986187582916504e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.29383609355165614630538852833671831839e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.09367754001841471839736367284852087164e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45744413840415901080013900562654222567e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.41920296534143581978760545125050148256e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.94857580745127596732818606388347624241e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.02847586753967876900858299686189155164e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.29953583375818707785500963989580066735e1), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27455303165341271216882778791555788609e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.41762124591820618604790027888328605963e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.30845760165840203715852751405553821601e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.00827370048057599908445731563638383351e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.19621193929561206904250173267823637982e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.10514757798726932158537558200005910184e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.79738493761540403010052092523396617472e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.94101664430520833603032182296078344870e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.31586575577250608890806988616823861649e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.93650751613703379272667745729529916084e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.52472388998113562780767055981852228229e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.01428305018551686265238906201345171425e0), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (p >= 0.125) { + RealType t = p - 0.125; + + // Rational Approximation + // Maximum Absolute Error: 1.3135e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.77109518013577849065583862782160121458e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.05813204052660740589813216397258899528e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.19628607167020425528944673039894592264e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.67162644860799051148361885190022738759e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.05921446080443979618622123764941760355e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.26685085062411656483492973256809500654e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.17117538916032273474332064444853786788e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.45059470468014721314631799845029715639e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.28952226224720891553119529857430570919e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.98502296814963504284919407719496390478e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.10876326351879104392865586365509749012e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.70358021544406445036220918341411271912e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.49724346845064961378591039928633169443e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.23815021378788622035604969476085727123e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.17262073948257994617723369387261569086e4), + }; + BOOST_MATH_STATIC const RealType Q[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.94901665980514882602824575757494472790e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.54328910175180674300123471690771017388e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.84847502738788846487698327848593567941e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.98451502799612368808473649408471338893e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.13744760159877712051088928513298431905e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.20745061658519699732567732006176366700e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.68622317228909264645937229979147883985e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.96020751551679746882793283955926871655e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.88860541272346724142574740580038834720e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.73454107207588310809238143625482857512e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.23165643368613191971938741926948857263e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.94832163019509140191456686231012184524e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.26616234097287315007047356261933409072e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24686019847093806280148917466062407447e4), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -4) { + RealType t = -log2(ldexp(p, 3)); + + // Rational Approximation + // Maximum Relative Error: 2.0498e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[11] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.77109518013577849065583862782160155093e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.77585398076895266354686007069850894777e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.47522378123968853907102309276280187353e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.63343576432650242131602396758195296288e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.77801189859227220359806456829683498508e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.93221663334563259732178473649683953515e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.95272757466323942599253855146019408376e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.73624853556509653351605530630788087166e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.41317699770351712612969089634227647374e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.34187895701093934279414993393750297714e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.64090928155753225614302094820737249510e-10), + }; + BOOST_MATH_STATIC const RealType Q[11] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62401464973350962823995096121206419019e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11979822811128264831341485706314465894e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.27257342406829987209876262928379300361e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.85505879705365729768944032174855501091e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40983451000610516082352700421098499905e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23459897865681009685618192649929504121e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.28925214684463186484928824536992032740e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.67647262682850294124662856194944728023e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88173142080572819772032615169461689904e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.07756799117728455728056041053803769069e-11), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -8) { + RealType t = -log2(ldexp(p, 4)); + + // Rational Approximation + // Maximum Relative Error: 6.7643e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.16727296241754547290632950718657117630e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.97822895738734630842909028778257589627e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.45580723831325060656664869189975355503e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.13835678647158936819843386298690513648e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.64536831064884519168892017327822018961e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.93786616484143556451247457584976578832e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.55770899078184683328915310751857391073e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.91778173446401005072425460365992356304e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.59064619930808759325013814591048817325e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.54786673836080683521554567617693797315e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.15917340396537949894051711038346411232e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.29633344043292285568750868731529586549e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.27785620133198676852587951604694784533e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.89999814745618370028655821500875451178e-16), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48772690114094395052120751771215809418e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.72281013057830222881716429522080327421e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.67186370229687087768391373818683340542e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.86988148601521223503040043124617333773e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43321724586909919175166704060749343677e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57428821868404424742036582321713763151e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17165774858274087452172407067668213010e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.78674439389954997342198692571336875222e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.82045374895858670592647375231115294575e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.40152058277291349447734231472872126483e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.34789603687129472952627586273206671442e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.38087376350052845654180435966624948994e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34945081364333330292720602508979680233e-16), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -16) { + RealType t = -log2(ldexp(p, 8)); + + // Rational Approximation + // Maximum Relative Error: 6.4987e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.78348038398799867332294266481364810762e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.42913983922316889357725662957488617770e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.02077376277824482097703213549730657663e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.01799479940825547859103232846394236067e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.31954083060883245879038709103320778401e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.14437110578260816704498035546280169833e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.75434713435598124790021625988306358726e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.70722283097111675839403787383067403199e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.22792548204908895458622068271940298849e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.23652632092726261134927067083229843867e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.26848751206698811476021875382152874517e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.96683933776920842966962054618493551480e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.65547426464916480144982028081303670013e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.01788104318587272115031165074724363239e-19), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.43507070588695242714872431565299762416e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.42808541175677232789532731946043918868e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.58154336417481327293949514291626832622e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.52883128062761272825364005132296437324e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.46220303655089035098911370014929809787e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.44776253795594076489612438705019750179e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.09607872267766585503592561222987444825e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.24270418154050297788150584301311027023e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.52138350835458198482199500102799185922e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.28330565098807415367837423320898722351e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.61220858078610415609826514581165467762e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31680570822471881148008283775281806658e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.61638868324981393463928986484698110415e-20), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -32) { + RealType t = -log2(ldexp(p, 16)); + + // Rational Approximation + // Maximum Relative Error: 6.4643e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.32474749499506228416012679106564727824e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.11125026189437033131539969177846635890e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.56906722402983201196890012041528422765e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.56242546565817333757522889497509484980e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.96189353402888611791301502740835972176e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.25518459970705638772495930203869523701e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.23831474024265607073689937590604367113e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.44925744847701733694636991148083680863e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.22891322042392818013643347840386719351e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.72860750698838897533843164259437533533e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.38276123679972197567738586890856461530e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.75010927807240165715236750369730131837e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.39435252454410259267870094713230289131e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.32672767938414655620839066142834241506e-23), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.71913035066927544877255131988977106466e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.07499674325721771035402891723823952963e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.73304002376509252638426379643927595435e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.45986195188119302051678426047947808068e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39631771214004792103186529415117786213e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.82972151546053891838685817022915476363e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.11484161875982352879422494936862579004e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.76886416872139526041488219568768973343e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.88160764330501845206576873052377420740e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.20653899535657202009579871085255085820e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.86752135706343102514753706859178940399e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.08670633989984379551412930443791478495e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96869107941293302786688580824755244599e-24), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -64) { + RealType t = -log2(ldexp(p, 32)); + + // Rational Approximation + // Maximum Relative Error: 6.2783e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.82318656228158372073367735499501003484e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.46261040951642110189344545942990712460e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.64741560190892266676648641695426188913e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.28753551974093682831398870653055328683e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.21312013770915263838500863217194379134e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.52436958859473873340733176333088176566e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.19550238139736009251193868269757013675e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.14964971787780037500173882363122301527e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.40304301938210548254468386306034204388e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.86982233973109416660769999752508002999e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.47229710624085810190563630948355644978e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.97511060097659395674010001155696382091e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.14321784268659603072523892366718901165e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.69804409248161357472540739283978368871e-27), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85763741109198600677877934140774914793e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.51555423561034635648725665049090572375e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.14334282485948451530639961260946534734e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.15303265564789411158928907568898290494e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.33945229806307308687045028827126348382e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.94373901322371782367428404051188999662e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.51420073260465851038482922686870398511e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.39366317896256472225488167609473929757e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32986474655329330922243678847674164814e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.09408217872473269288530036223761068322e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.79051953285476930547217173280519421410e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.94530899348454778842122895096072361105e-24), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36452993460830805591166007621343447892e-28), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -128) { + RealType t = -log2(ldexp(p, 64)); + + // Rational Approximation + // Maximum Relative Error: 6.0123e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.29700011190686230364493911161520668302e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.16175031776740080906111179721128106011e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.33343982195432985864570319341790342784e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.25414682801788504282484273374052405406e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.08812659343240279665150323243172015853e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.33251452861660571881208437468957953698e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.80894766863868081020089830941243893253e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.84955155823472122347227298177346716657e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.98637322645260158088125181176106901234e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.24174383760514163336627039277792172744e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.54369979866464292009398761404242103210e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.02572051048819721089874338860693952304e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.93656169061287808919601714139458074543e-27), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.14930159772574816086864316805656403181e-31), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.27154900915819978649344191118112870943e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.77527908332591966425460814882436207182e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.88097249712649070373643439940164263005e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33593973311650359460519742789132084170e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34383186845963127931313004467487408932e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.18631088001587612168708294926967112654e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.25338215226314856456799568077385137286e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30644713290591280849926388043887647219e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.55508263112797212356530850090635211577e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.96694528841324480583957017533192805939e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.81411886190142822899424539396403206677e-24), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.64683991040772975824276994623053932566e-28), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.81924597500766743545654858597960153152e-32), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -256) { + RealType t = -log2(ldexp(p, 128)); + + // Rational Approximation + // Maximum Relative Error: 5.7624e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.75666995985336007747791649448887723610e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.78960399079208663111712385988217075907e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.74942252057371678208959612011771010491e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.54567765510065203543937772001248399869e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.53093894540157655856029322335609764674e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.83833601054721321664219768559444646069e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.52281007055180941965172296953524749452e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.57322728543196345563534040700366511864e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.52881564741260266060082523971278782893e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.60440334652864372786302383583725866608e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.60691285483339296337794569661545125426e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.29567560587442907936295101146377006338e-27), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.50976593324256906782731237116487284834e-31), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.71308835356954147218854223581309967814e-35), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.62732785401286024270119905692156750540e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.40382961238668912455720345718267045656e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10406445824749289380797744206585266357e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.28896702052362503156922190248503561966e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.16141168910009886089186579048301366151e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41978644147717141591105056152782456952e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.28101353275172857831967521183323237520e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.02996252940600644617348281599332256544e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.91006255647885778937252519693385130907e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.84585864559619959844425689120130028450e-24), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.68573963627097356380969264657086640713e-28), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11059307697054035905630311480256015939e-31), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.36363494270701950295678466437393953964e-36), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -512) { + RealType t = -log2(ldexp(p, 256)); + + // Rational Approximation + // Maximum Relative Error: 5.5621e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.20826069989721596260510558511263035942e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.97440158261228371765435988840257904642e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.03971528248920108158059927256206438162e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.27123766722395421727031536104546382045e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.42341191105097202061646583288627536471e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.27644514375284202188806395834379509517e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.10772944192965679212172315655880689287e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.32875098791800400229370712119075696952e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.06204614360238210805757647764525929969e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.43745006810807466452260414216858795476e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.66712970893511330059273629445122037896e-26), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.72840198778128683137250377883245540424e-30), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.91906782399731224228792112460580813901e-34), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.42769091263044979075875010403899574987e-39), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.31008507886426704374911618340654350029e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.34370110384866123378972324145883460422e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37370811166006065198348108499624387519e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.14880753458828334658200185014547794333e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29049942195929206183214601044522500821e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19814793427532184357255406261941946071e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.53609759199568827596069048758012402352e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.94113467521833827559558236675876398395e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.46066673213431758610437384053309779874e-24), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.73781952388557106045597803110890418919e-27), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.17225106466605017267996611448679124342e-31), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.66384334839761400228111118435077786644e-35), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.02877806111195383689496741738320318348e-40), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -1024) { + RealType t = -log2(ldexp(p, 512)); + + // Rational Approximation + // Maximum Relative Error: 5.4128e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.65527239540648657446629479052874029563e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.49609214609793557370425343404734771058e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.85355312961840000203681352424632999367e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.57243631623079865238801420669247289633e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.64978384343879316016184643597712973486e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.12776782513387823319217102727637716531e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.86985041780323969283076332449881856202e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.11665149267826038417038582618446201377e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.44259259232002496618805591961855219612e-22), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.13186466395317710362065595347401054176e-25), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.72627240737786709568584848420972570566e-29), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.40450635670659803069555960816203368299e-33), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.90550919589933206991152832258558972394e-38), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.33785768143117121220383154455316199086e-43), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15365252334339030944695314405853064901e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.84519641047962864523571386561993045416e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71097850431873211384168229175171958023e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.20268329795802836663630276028274915013e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.00891848515558877833795613956071967566e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.41031229424613259381704686657785733606e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96506914235910190020798805190634423572e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.51190756655665636680121123277286815188e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.82855686000721415124702578998188630945e-26), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.64298533757673219241102013167519737553e-30), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.01176104624443909516274664414542493718e-34), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.41571947895162847564926590304679876888e-39), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.84682590163505511580949151048092123923e-44), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -2048) { + RealType t = -log2(ldexp(p, 1024)); + + // Rational Approximation + // Maximum Relative Error: 5.3064e-35 + //LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.09971143249822249471944441552701756051e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.00154235169065403254826962372636417554e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.76859552294270710004718457715250134998e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.16331901379268792872208226779641113312e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.11590258438815173520561213981966313758e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.17278804462968109983985217400233347654e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.14112976645884560534267524918610371127e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.34652102658577790471066054415469309178e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.85242987373551062800089607781071064493e-24), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.35051904844102317261572436130886083833e-28), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.78478298776769981726834169566536801689e-32), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.22532973433435489030532261530565473605e-37), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.25433604872532935232490414753194993235e-41), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.49792182967344082832448065912949074241e-47), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.76316274347013095030195725596822418859e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45872499993438633169552184478587544165e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13309566903496793786045158442686362533e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99468690853840997883815075627545315449e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24734617022827960185483615293575601906e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.30099852343633243897084627428924039959e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52598626985708878790452436052924637029e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.91432956461466900007096548587800675801e-25), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.54421383015859327468201269268335476713e-29), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.55939743284103455997584863292829252782e-33), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.73331214275752923691778067125447148395e-38), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.55089353084326800338273098565932598679e-42), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.85408276119483460035366338145310798737e-48), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -4096) { + RealType t = -log2(ldexp(p, 2048)); + + // Rational Approximation + // Maximum Relative Error: 5.2337e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.54271778755494231572464179212263718102e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.62737121212473668543011440432166267791e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.10099492629239750693134803100262740506e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.56925359477960645026399648793960646858e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.50756287005636861300081510456668184335e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.40657453971177017986596834420774251809e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.25518001919157628924245515302669097090e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.79618511101781942757791021761865762100e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.70242241511341924787722778791482800736e-27), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.87078860748428154402226644449936091766e-31), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.84256560347986567120140826597805016470e-35), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.71419536977123330712095123316879755172e-40), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.20746149769511232987820552765701234564e-45), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.97544543671003989410397788518265345930e-51), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.87980995989632171985079518382705421728e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.64234691529227024725728122489224211774e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.66149363392892604040036997518509803848e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24365427902918684575287447585802611012e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.88619663977804926166359181945671853793e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.25299846770395565237726328268659386749e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18720305257346902130922082357712771134e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13302092710568005396855019882472656722e-27), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.88571994886818976015465466797965950164e-32), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.48486836614948668196092864992423643733e-36), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72247229252387482782783442901266890088e-41), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.76046592638280288324495546006105696670e-46), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.09378739037162732758860377477607829024e-52), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -8192) { + RealType t = -log2(ldexp(p, 4096)); + + // Rational Approximation + // Maximum Relative Error: 5.1864e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.98493298246627952401490656857159302716e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.76990949843357898517869703626917264559e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.65042794324685841303461715489845834903e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.10605446678026983843303253925148000808e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.02762962429283889606329831562937730874e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.04105882385534634676234513866095562877e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.76001901462155366759952792570076976049e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.48255362964603267691139956218580946011e-25), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.19422119466925125740484046268759113569e-29), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.00719439924828639148906078835399693640e-33), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.89921842231783558951433534621837291030e-38), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.90738353848476619269054038082927243972e-43), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.19893980415902021846066305054394089887e-49), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.85434486590981105149494168639321627061e-55), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43932168599456260558411716919165161381e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.09851997458503734167541584552305867433e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.32285843258966417340522520711168738158e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.76041266755635729156773747720864677283e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21202253509959946958614664659473305613e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28647335562574024550800155417747339700e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.24953333571478743858014647649207040423e-26), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.41206199962423704137133875822618501173e-30), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34018702380092542910629787632780530080e-34), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41732943566503750356718429150708698018e-39), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29626943299239081309470153019011607254e-44), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.13947437500822384369637881437951570653e-50), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.61766557173110449434575883392084129710e-56), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -16384) { + RealType t = -log2(ldexp(p, 8192)); + + // Rational Approximation + // Maximum Relative Error: 5.1568e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.42671464308364892089984144203590292562e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.70165333325375920690660683988390032004e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.51230594210711745541592189387307516997e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.81387249912672866168782835177116953008e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.35308063526816559199325906123032162155e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.67672500455361049516022171111707553191e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.45533142942305626136621399056034449775e-22), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.37422341389432268402917477004312957781e-27), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.32123176403616347106899307416474970831e-31), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.07816837508332884935917946618577512264e-36), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.95418937882563343895280651308376855123e-41), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.78256110112636303941842779721479313701e-47), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.09423327107440352766843873264503717048e-52), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.12386630925835960782702757402676887380e-58), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.19477412398085422408065302795208098500e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.27347517804649548179786994390985841531e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.15042399607786347684366638940822746311e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.84537882580411074097888848210083177973e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.78283331111405789359863743531858801963e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.00711555012725961640684514298170252743e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.21370151454170604715234671414141850094e-28), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.72008007024350635082914256163415892454e-32), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.61182095564217124712889821368695320635e-37), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35498047010165964231841033788823033461e-42), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11828030216193307885831734256233140264e-47), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22499308298315468568520585583666049073e-53), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04877018522402283597555167651619229959e-59), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + result = -boost::math::numeric_limits::infinity(); + } + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_quantile_upper_imp_prec(const RealType& p, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (p >= 0.375) { + RealType t = p - static_cast < RealType>(0.375); + + // Rational Approximation + // Maximum Relative Error: 5.1286e-20 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(1.31348919222343858178e0), + static_cast(-1.06646675961352786791e0), + static_cast(-1.80946160022120488884e1), + static_cast(-1.53457017598330440033e0), + static_cast(4.71260102173048370028e1), + static_cast(4.61048467818771410732e0), + static_cast(-2.80957284947853532418e1), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1), + static_cast(4.71007453129016317772e0), + static_cast(1.31946404969596908872e0), + static_cast(-1.70321827414586880227e1), + static_cast(-1.11253495615474018666e1), + static_cast(1.62659086449959446986e1), + static_cast(7.37109203295032098763e0), + static_cast(-2.43898047338699777337e0), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (p >= 0.25) { + RealType t = p - static_cast < RealType>(0.25); + + // Rational Approximation + // Maximum Relative Error: 3.4934e-18 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(2.55081568282045924981e0), + static_cast(5.38750533719526696218e0), + static_cast(-2.32797421725187349036e1), + static_cast(-3.96043566411306749784e1), + static_cast(3.80609941977115436545e1), + static_cast(3.35014421131920266346e1), + static_cast(-1.17490458743273503838e1), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1), + static_cast(7.52439409918350484765e0), + static_cast(1.34784954182866689668e1), + static_cast(-9.21002543625052363446e0), + static_cast(-2.67378141317474265949e1), + static_cast(2.10158795079902783094e0), + static_cast(5.90098096212203282798e0), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (p >= 0.125) { + RealType t = p - static_cast < RealType>(0.125); + + // Rational Approximation + // Maximum Relative Error: 4.0795e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(5.68160868054034111703e0), + static_cast(1.06098927525586705381e2), + static_cast(5.74509518025029027944e2), + static_cast(4.91117375866809056969e2), + static_cast(-2.92607000654635606895e3), + static_cast(-3.82912009541683403499e3), + static_cast(2.49195208452006100935e3), + static_cast(1.29413301335116683836e3), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1), + static_cast(2.69603865809599480308e1), + static_cast(2.63378422475372461819e2), + static_cast(1.09903493506098212946e3), + static_cast(1.60315072092792425370e3), + static_cast(-5.44710468198458322870e2), + static_cast(-1.76410218726878681387e3), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -4) { + RealType t = -log2(ldexp(p, 3)); + + // Rational Approximation + // Maximum Relative Error: 4.4618e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(7.10201085067542566037e-1), + static_cast(6.70042401812679849451e-1), + static_cast(2.42799404088685074098e-1), + static_cast(4.80613880364042262227e-2), + static_cast(6.04473313360581797461e-3), + static_cast(5.09172911021654842046e-4), + static_cast(-6.63145317984529265677e-6), + }; + BOOST_MATH_STATIC const RealType Q[6] = { + static_cast(1), + static_cast(9.18649629646213969612e-1), + static_cast(3.66343989541898286306e-1), + static_cast(8.01010534748206001446e-2), + static_cast(1.00553335007168823115e-2), + static_cast(6.30966763237332075752e-4), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p); + } + else if (ilogb(p) >= -8) { + RealType t = -log2(ldexp(p, 4)); + + // Rational Approximation + // Maximum Relative Error: 5.8994e-17 + BOOST_MATH_STATIC const RealType P[9] = { + static_cast(7.06147398566773538296e-1), + static_cast(4.26802162741800814387e-1), + static_cast(1.32254436707168800420e-1), + static_cast(2.86055054496737936396e-2), + static_cast(3.63373131686703931514e-3), + static_cast(3.84438945816411937013e-4), + static_cast(1.67768561420296743529e-5), + static_cast(8.76982374043363061978e-7), + static_cast(-1.99744396595921347207e-8), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1), + static_cast(6.28190787856605587324e-1), + static_cast(2.10992746593815791546e-1), + static_cast(4.44397672327578790713e-2), + static_cast(6.02768341661155914525e-3), + static_cast(5.46578619531721658923e-4), + static_cast(3.11116573895074296750e-5), + static_cast(1.17729007979018602786e-6), + static_cast(-2.78441865351376040812e-8), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p); + } + else if (ilogb(p) >= -16) { + RealType t = -log2(ldexp(p, 8)); + + // Rational Approximation + // Maximum Relative Error: 8.8685e-17 + BOOST_MATH_STATIC const RealType P[9] = { + static_cast(6.48209596014908359251e-1), + static_cast(2.52611824671691390768e-1), + static_cast(4.65114070477803399291e-2), + static_cast(5.23373513313686849909e-3), + static_cast(3.83113384161076881958e-4), + static_cast(1.96230077517629530809e-5), + static_cast(5.83117485120890819338e-7), + static_cast(6.92614450423703079737e-9), + static_cast(-3.89531123166658723619e-10), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1), + static_cast(3.99413988076189200840e-1), + static_cast(7.32068638518417765776e-2), + static_cast(8.15517102642752348889e-3), + static_cast(6.09126071418098074914e-4), + static_cast(3.03794079468789962611e-5), + static_cast(9.32109079205017197662e-7), + static_cast(1.05435710482490499583e-8), + static_cast(-6.08748435983193979360e-10), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p); + } + else if (ilogb(p) >= -32) { + RealType t = -log2(ldexp(p, 16)); + + // Rational Approximation + // Maximum Relative Error: 1.0253e-17 + BOOST_MATH_STATIC const RealType P[10] = { + static_cast(6.36719010559816164896e-1), + static_cast(2.06504115804034148753e-1), + static_cast(3.28085429275407182582e-2), + static_cast(3.31676417519020335859e-3), + static_cast(2.35502578757551086372e-4), + static_cast(1.21652240566662139418e-5), + static_cast(4.57039495420392748658e-7), + static_cast(1.18090959236399583940e-8), + static_cast(1.77492646969597480221e-10), + static_cast(-2.19331267300885448673e-17), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1), + static_cast(3.24422807416528490276e-1), + static_cast(5.15290129833049138552e-2), + static_cast(5.21051235888272287209e-3), + static_cast(3.69895399249472399625e-4), + static_cast(1.91103139437893226482e-5), + static_cast(7.17882574725373091636e-7), + static_cast(1.85502934977316481559e-8), + static_cast(2.78798057565507249164e-10), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p); + } + else if (ilogb(p) >= -64) { + RealType t = -log2(ldexp(p, 32)); + + // Rational Approximation + // Maximum Relative Error: 8.1705e-17 + BOOST_MATH_STATIC const RealType P[9] = { + static_cast(6.36619775525705206992e-1), + static_cast(2.68335698140634792041e-1), + static_cast(5.49803347535070103650e-2), + static_cast(7.25018344556356907109e-3), + static_cast(6.87753481255849254220e-4), + static_cast(4.86155006277788340253e-5), + static_cast(2.84604768310787862450e-6), + static_cast(9.56133960810049319917e-8), + static_cast(5.26850116571886385248e-9), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1), + static_cast(4.21500730173440590900e-1), + static_cast(8.63629077498258325752e-2), + static_cast(1.13885615328098640032e-2), + static_cast(1.08032064178130906887e-3), + static_cast(7.63650498196064792408e-5), + static_cast(4.47056124637379045275e-6), + static_cast(1.50189171357721423127e-7), + static_cast(8.27574227882033707932e-9), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p); + } + else { + result = 2 / (constants::pi() * p); + } + + return result; +} + + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_quantile_upper_imp_prec(const RealType& p, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (p >= 0.4375) { + RealType t = p - 0.4375; + + // Rational Approximation + // Maximum Relative Error: 1.4465e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[11] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.08338732735341567163440035550389989556e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.27245731792290848390848202647311435023e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.29317169036386848462079766136373749420e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36342136825575317326816540539659955416e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.31108700679715257074164180252148868348e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.81863611749256385875333154189074054367e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.11618233433781722149749739225688743102e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.45241854625686954669050322459035410227e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.09780430233523239228350030812868983054e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.42232005306623465126477816911649683789e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.24816048952817367950452675590290535540e0), + }; + BOOST_MATH_STATIC const RealType Q[10] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.80464069267458650284548842830642770344e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.28240205449280944407125436342013240876e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.94145088402407692372903806765594642452e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30062294376971843436236253827463203953e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.47118047660686070998671803800237836970e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.00643263133479482753298910520340235765e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.79460803824650509439313928266686172255e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.32647058691746306769699006355256099134e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.59208938705683333141038012302171324544e0), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (p >= 0.375) { + RealType t = p - 0.375; + + // Rational Approximation + // Maximum Relative Error: 5.1929e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[11] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31348919222343858173602105619413801018e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.02800226274700443079521563669609776285e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.02091675505570786434803291987263553778e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50141943970885120432710080552941486001e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.93099903417013423125762526465625227789e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.56412922160141953385088141936082249641e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.47026602535072645589119440784669747242e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.01068960815396205074336853052832780888e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.86591619131639705495877493344047777421e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.26390836417639942474165178280649450755e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.18212484486162942333407102351878915285e0), + }; + BOOST_MATH_STATIC const RealType Q[10] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.97802777458574322604171035748634755981e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.33277809211107726455308655998819166901e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.76555481647551088626503871996617234475e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.33146828123660043197526014404644087069e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.65159900182434446550785415837526228592e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.32391192521438191878041140980983374411e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.12112886240590711980064990996002999330e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.93964809733838306198746831833843897743e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.53948309965401603055162465663290204205e1), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (p >= 0.25) { + RealType t = p - 0.25; + + // Rational Approximation + // Maximum Relative Error: 3.2765e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.55081568282045925871949387822806890848e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21080883686702131458668798583937913025e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.15083151599213113740932148510289036342e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.94190629930345397070104862391009053509e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.40768205403470729468297576291723141480e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.00001008242667338579153437084294876585e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70900785394455368299616221471466320407e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.48947677419760753410122194475234527150e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.01826174001050912355357867446431955195e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.55833657916143927452986099130671173511e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.32953617526068647169047596631564287934e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.32825234826729794599233825734928884074e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.47352171888649528242284500266830013906e1), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40793887011403443604922082103267036101e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.04348824299115035210088417095305744248e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19680004238557953382868629429538716069e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.31172263627566980203163658640597441741e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.07390429662527773449936608284938592773e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.94877589960261706923147291496752293313e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.94903802003585398809229608695623474341e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.80417437710146805538675929521229778181e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.23364098614130091185959973343748897970e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.12975537807357019330268041620753617442e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.36592279898578127130605391750428961301e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18495624730372864715421146607185990918e1), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (p >= 0.125) { + RealType t = p - 0.125; + + // Rational Approximation + // Maximum Relative Error: 1.8007e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.68160868054034088524891526884683014057e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85165791469635551063850795991424359350e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.42938802867742165917839659578485422534e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59273512668331194186228996665355137458e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.91680503091725091370507732042764517726e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85642348415580865994863513727308578556e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.90181935466760294413877600892013910183e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.89141276256233344773677083034724024215e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.00250514074918631367419468760920281159e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28168216451109123143492880695546179794e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.14996399533648172721538646235459709807e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.58122093722347315498230864294015130011e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.25168985723506298009849577846542992545e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.01179759985059408785527092464505889999e5), + }; + BOOST_MATH_STATIC const RealType Q[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.08766677593618443545489115711858395831e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.05163374816838964338807027995515659842e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.62582103160439981904537982068579322820e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.62170991799612186300694554812291085206e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11013837158432827711075385018851760313e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45458895395245243570930804678601511371e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.08336489932795411216528182314354971403e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.11314692423102333551299419575616734987e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.43287683964711678082430107025218057096e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.62052814931825182298493472041247278475e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.91440920656902450957296030252809476245e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.54913345383745613446952578605023052270e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.76034827722473399290702590414091767416e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.94027684838690965214346010602354223752e3), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -4) { + RealType t = -log2(ldexp(p, 3)); + + // Rational Approximation + // Maximum Relative Error: 6.1905e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.10201085067542610656114408605853786551e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.04725580445598482170291458376577106746e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.35945839005443673797792325217359695272e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.15894004364989372373490772246381545906e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.54550169514753150042231386414687368032e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50389998399729913427837945242228928632e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75018554725308784191307050896936055909e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.95901705695908219804887362154169268380e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.34386856794684798098717884587473860604e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.89025399683852111061217430321882178699e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19044156703773954109232310846984749672e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11932910013840927659486142481532276176e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.64064398716881126082770692219937093427e-10), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24909572944428286558287313527068259394e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.70912720447370835699164559729287157119e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.21998644852982625437008410769048682388e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.95906385698373052547496572397097325447e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35344144061390771459100718852878517200e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.34168669072527413734185948498168454149e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.24488907049996230177518311480230131257e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.92059624838630990024209986717533470508e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.84464614954263838504154559314144088371e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.67874815200287308180777775077428545024e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.65919857481420519138294080418011981524e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.31466713452016682217190521435479677133e-10), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p); + } + else if (ilogb(p) >= -8) { + RealType t = -log2(ldexp(p, 4)); + + // Rational Approximation + // Maximum Relative Error: 8.5157e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.06147398566773479301585022897491054494e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06137881154706023038556659418303323027e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.00274868819366386235164897614448662308e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.03481313941011533876096564688041226638e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50172569438851062169493372974287427240e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.33370725278950299189434839636002761850e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.97566905908106543054773229070602272718e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85701973515993932384374087677862623215e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.81956143385351702288398705969037130205e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.49975572102999645354655667945479202048e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.54665400959860442558683245665801873530e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.94292402413454232307556797758030774716e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.98038791388715925556623187510676330309e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11242951548709169234296005470944661995e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.92636379295018831848234711132457626676e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.77389296072621088586880199705598178518e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.57808410784300002747916947756919004207e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.93860773322862111592582321183379587624e-16), + }; + BOOST_MATH_STATIC const RealType Q[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52683694883265337797012770275040297516e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17837082293165509684677505408307814500e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.06195236296471366891670923430225774487e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29459155224640682509948954218044556307e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.71350726081102446771887145938865551618e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.55986063168260695680927535587363081713e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.91996892322204645930710038043021675160e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.43907073162091303683795779882887569537e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.50034830055055263363497137448887884379e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.56615898355501904078935686679056442496e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.61099855362387625880067378834775577974e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.12940315230564635808566630258463831421e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.73572881409271303264226007333510301220e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.77786420070246087920941454352749186288e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.77914406265766625938477137082940482898e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.19708585422668069396821478975324123588e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40406059898292960948942525697075698413e-15), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p); + } + else if (ilogb(p) >= -16) { + RealType t = -log2(ldexp(p, 8)); + + // Rational Approximation + // Maximum Relative Error: 7.6812e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.48209596014908270566135466727658374314e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.02026332003132864886056710532156370366e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.68941634461905013212266453851941196774e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.61650792370551069313309111250434438540e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52611930219013953260661961529732777539e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29488123972430683478601278003510200360e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.68806175827491046693183596144172426378e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.51806782259569842628995584152985951836e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92353868262961486571527005289554589652e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21494769586031703137329731447673056499e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.39837421784601055804920937629607771973e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.82216155524308827738242486229625170158e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04275785296896148301798836366902456306e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.19999929939765873468528448012634122362e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.03583326787146398902262502660879425573e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59755092249701477917281379650537907903e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.32583227076029470589713734885690555562e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.18237323554153660947807202150429686004e-20), + }; + BOOST_MATH_STATIC const RealType Q[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.38459552164692902984228821988876295376e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.21584899508575302641780901222203752951e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19656836695824518143414401720590693544e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.39821085943818944882332778361549212756e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.60484296824768079700823824408428524933e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.22173385695010329771921985088956556771e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.95541259523416810836752584764202086573e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.02184255281138028802991551275755427743e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.90825805251143907045903671893185297007e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00501277755608081163250456177637280682e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.43387099521800224735155351696799358451e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63751106922299101655071906417624415019e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.02796982349519589339629488980132546290e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26197249278457937947269910907701176956e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.50981935956236238709523457678017928506e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.65309560070040982176772709693008187384e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.42817828965851841104270899392956866435e-20), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p); + } + else if (ilogb(p) >= -32) { + RealType t = -log2(ldexp(p, 16)); + + // Rational Approximation + // Maximum Relative Error: 2.8388e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36719010559816175149447242695581604280e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.14714772485724956396126176973339095223e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.47792450677638612907408723539943311437e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14084804538576805298420530820092167411e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.25784891219227004394312050838763762669e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06837168825575413225975778906503529455e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.26908306638706189702624634771158355088e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.06396335535135452379658152785541731746e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.89854018431899039966628599727721422261e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.48974049316978526855972339306215972434e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.50886538662952684349385729585856778829e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.14095970401472469264258565259303801322e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71915162586912203234023473966563445362e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.46099196574734038609354417874908346873e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.69944075002490023348175340827135133316e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.37340205792165863440617831987825515203e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.87812199530402923085142356622707924805e-22), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.76810877067601573471489978907720495511e-24), + }; + BOOST_MATH_STATIC const RealType Q[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.94373217074550329856398644558576545146e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17462725343185049507839058445338783693e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.79202779096887355136298419604918306868e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.97583473532621831662838256679872014292e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.67819154370257505016693473230060726722e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14182379349642191946237975301363902175e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.38367234191828732305257162934647076311e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.98221340505887984555143894024281550376e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17648765147609962405833802498013198305e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.94091261341172666220769613477202626517e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12169979717068598708585414568018667622e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.70043694579268983742161305612636042906e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.43651270200498902307944806310116446583e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.95266223996097470768947426604723764300e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15821111112681530432702452073811996961e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08041298058041360645934320138765284054e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.91893114159827950553463154758337724676e-24), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p); + } + else if (ilogb(p) >= -64) { + RealType t = -log2(ldexp(p, 32)); + + // Rational Approximation + // Maximum Relative Error: 1.8746e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[19] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36619775525705288697351261475419832625e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29882145587771350744255724773409752285e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.07952726597277085327360888304737411175e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72928496414816922167597110591366081416e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.01641163277458693633771532254570177776e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65627339211110756774878685166318417370e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41953343652571732907631074381749818724e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.27682202874503433884090203197149318368e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.33177176779158737868498722222027162030e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.34485618544363735547395633416797591537e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96996761199233617188435782568975757378e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.49631247632674130553464740647053162499e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.68090516971007163491968659797593218680e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39910262557283449853923535586722968539e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83704888007521886644896435914745476741e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.87884425419276681417666064027484555860e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36600092466902449189685791563990733005e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.53604301472332155307661986064796109517e-26), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.65664588229982894587678197374867153136e-40), + }; + BOOST_MATH_STATIC const RealType Q[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.61098031370834273919229478584740981117e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.40810642301361416278392589243623940154e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.42874346984605660407576451987840217534e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.30896462654364903689199648803900475405e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.17246449391141576955714059812811712587e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22979790521806964047777145482613709395e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.85960899519336488582042102184331670230e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.66273852553220863665584472398487539899e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15372731217983084923067673501176233172e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.09441788794783860366430915309857085224e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.06279112323261126652767146380404236150e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36359339522621405197747209968637035618e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19770526521305519813109395521868217810e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.88562963284557433336083678206625018948e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.95128165317597657325539450957778690578e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.14570923483883184645242764315877865073e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.69599603258626408321886443187629340033e-26), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p); + } + else if (ilogb(p) >= -128) { + RealType t = -log2(ldexp(p, 64)); + + // Rational Approximation + // Maximum Relative Error: 3.9915e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36619772367581344576326594951209529606e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72456363182667891167613558295097711432e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.74486567435450138741058930951301644059e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.94624522781897679952110594449134468564e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.09848623985771449914778668831103210333e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.47493285141689711937343304940229517457e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.22134975575390048261922652492143139174e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30240148387764167235466713023950979069e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06917824188001432265980161955665997666e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.27834220508404489112697949450988070802e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48663447630051388468872352628795428134e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.50514504588736921389704370029090421684e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18965303814265217659151418619980209487e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74200654214326267651127117044008493519e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.34060054352573532839373386456991657111e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.34240516843783954067548886404044879120e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07803703545135964499326712080667886449e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.61500479431085205124031101160332446432e-23), + }; + BOOST_MATH_STATIC const RealType Q[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.27973454499231032893774072677004977154e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.02401389920613749641292661572240166038e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24819328156695252221821935845914708591e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27210724381675120281861717194783977895e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01708007392492681238863778030115281961e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.63088069045476088736355784718397594807e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.61660379368211892821215228891806384883e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.67946125503415200067055797463173521598e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.72040422051759599096448422858046040086e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.33519997465950200122152159780364149268e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.07666528975810553712124845533861745455e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.86870262247486708096341722190198527508e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.30713380444621290817686989936029997572e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.38899571664905345700275460272815357978e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46750157220118157937510816924752429685e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.69337661543585547694652989893297703060e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.53684359865963395505791671817598669527e-23), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p); + } + else { + result = 2 / (constants::pi() * p); + } + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_quantile_imp_prec(const RealType& p, bool complement, const boost::math::integral_constant& tag) +{ + if (p > 0.5) + { + return !complement ? landau_quantile_upper_imp_prec(1 - p, tag) : landau_quantile_lower_imp_prec(1 - p, tag); + } + + return complement ? landau_quantile_upper_imp_prec(p, tag) : landau_quantile_lower_imp_prec(p, tag); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_quantile_imp_prec(const RealType& p, bool complement, const boost::math::integral_constant& tag) +{ + if (p > 0.5) + { + return !complement ? landau_quantile_upper_imp_prec(1 - p, tag) : landau_quantile_lower_imp_prec(1 - p, tag); + } + + return complement ? landau_quantile_upper_imp_prec(p, tag) : landau_quantile_lower_imp_prec(p, tag); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_quantile_imp(const landau_distribution& dist, const RealType& p, bool complement) +{ + // This routine implements the quantile for the Landau distribution, + // the value p may be the probability, or its complement if complement=true. + + constexpr auto function = "boost::math::quantile(landau<%1%>&, %1%)"; + BOOST_MATH_STD_USING // for ADL of std functions + + RealType result = 0; + RealType scale = dist.scale(); + RealType location = dist.location(); + RealType bias = dist.bias(); + + if (false == detail::check_location(function, location, &result, Policy())) + { + return result; + } + if (false == detail::check_scale(function, scale, &result, Policy())) + { + return result; + } + if (false == detail::check_probability(function, p, &result, Policy())) + { + return result; + } + + typedef typename tools::promote_args::type result_type; + typedef typename policies::precision::type precision_type; + typedef boost::math::integral_constant tag_type; + + static_assert(tag_type::value, "The Landau distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats"); + + result = location + scale * (landau_quantile_imp_prec(p, complement, tag_type()) - bias); + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_mode_imp_prec(const boost::math::integral_constant&) +{ + return static_cast(-0.42931452986133525017); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_mode_imp_prec(const boost::math::integral_constant&) +{ + return BOOST_MATH_BIG_CONSTANT(RealType, 113, -0.42931452986133525016556463510885028346); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_mode_imp(const landau_distribution& dist) +{ + // This implements the mode for the Landau distribution, + + constexpr auto function = "boost::math::mode(landau<%1%>&, %1%)"; + BOOST_MATH_STD_USING // for ADL of std functions + + RealType result = 0; + RealType scale = dist.scale(); + RealType location = dist.location(); + RealType bias = dist.bias(); + + if (false == detail::check_location(function, location, &result, Policy())) + { + return result; + } + if (false == detail::check_scale(function, scale, &result, Policy())) + { + return result; + } + + typedef typename tools::promote_args::type result_type; + typedef typename policies::precision::type precision_type; + typedef boost::math::integral_constant tag_type; + + static_assert(tag_type::value, "The Landau distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats"); + + result = location + scale * (landau_mode_imp_prec(tag_type()) - bias); + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_median_imp_prec(const boost::math::integral_constant&) +{ + return static_cast(0.57563014394507821440); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_median_imp_prec(const boost::math::integral_constant&) +{ + return BOOST_MATH_BIG_CONSTANT(RealType, 113, 0.57563014394507821439627930892257517269); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_median_imp(const landau_distribution& dist) +{ + // This implements the median for the Landau distribution, + + constexpr auto function = "boost::math::median(landau<%1%>&, %1%)"; + BOOST_MATH_STD_USING // for ADL of std functions + + RealType result = 0; + RealType scale = dist.scale(); + RealType location = dist.location(); + RealType bias = dist.bias(); + + if (false == detail::check_location(function, location, &result, Policy())) + { + return result; + } + if (false == detail::check_scale(function, scale, &result, Policy())) + { + return result; + } + + typedef typename tools::promote_args::type result_type; + typedef typename policies::precision::type precision_type; + typedef boost::math::integral_constant tag_type; + + static_assert(tag_type::value, "The Landau distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats"); + + result = location + scale * (landau_median_imp_prec(tag_type()) - bias); + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_entropy_imp_prec(const boost::math::integral_constant&) +{ + return static_cast(2.37263644000448182448); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_entropy_imp_prec(const boost::math::integral_constant&) +{ + return BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.3726364400044818244844049010588577710); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType landau_entropy_imp(const landau_distribution& dist) +{ + // This implements the entropy for the Landau distribution, + + constexpr auto function = "boost::math::entropy(landau<%1%>&, %1%)"; + BOOST_MATH_STD_USING // for ADL of std functions + + RealType result = 0; + RealType scale = dist.scale(); + + if (false == detail::check_scale(function, scale, &result, Policy())) + { + return result; + } + + typedef typename tools::promote_args::type result_type; + typedef typename policies::precision::type precision_type; + typedef boost::math::integral_constant tag_type; + + static_assert(tag_type::value, "The Landau distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats"); + + result = landau_entropy_imp_prec(tag_type()) + log(scale); + + return result; +} + +} // detail + +template > +class landau_distribution +{ + public: + typedef RealType value_type; + typedef Policy policy_type; + + BOOST_MATH_GPU_ENABLED landau_distribution(RealType l_location = 0, RealType l_scale = 1) + : mu(l_location), c(l_scale) + { + BOOST_MATH_STD_USING + + constexpr auto function = "boost::math::landau_distribution<%1%>::landau_distribution"; + RealType result = 0; + detail::check_location(function, l_location, &result, Policy()); + detail::check_scale(function, l_scale, &result, Policy()); + + location_bias = -2 / constants::pi() * log(l_scale); + } // landau_distribution + + BOOST_MATH_GPU_ENABLED RealType location()const + { + return mu; + } + BOOST_MATH_GPU_ENABLED RealType scale()const + { + return c; + } + BOOST_MATH_GPU_ENABLED RealType bias()const + { + return location_bias; + } + + private: + RealType mu; // The location parameter. + RealType c; // The scale parameter. + RealType location_bias; // = -2 / pi * log(c) +}; + +typedef landau_distribution landau; + +#ifdef __cpp_deduction_guides +template +landau_distribution(RealType) -> landau_distribution::type>; +template +landau_distribution(RealType, RealType) -> landau_distribution::type>; +#endif + +template +BOOST_MATH_GPU_ENABLED inline const boost::math::pair range(const landau_distribution&) +{ // Range of permissible values for random variable x. + BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits::has_infinity) + { + return boost::math::pair(-boost::math::numeric_limits::infinity(), boost::math::numeric_limits::infinity()); // - to + infinity. + } + else + { // Can only use max_value. + using boost::math::tools::max_value; + return boost::math::pair(-max_value(), max_value()); // - to + max. + } +} + +template +BOOST_MATH_GPU_ENABLED inline const boost::math::pair support(const landau_distribution&) +{ // Range of supported values for random variable x. + // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. + BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits::has_infinity) + { + return boost::math::pair(-boost::math::numeric_limits::infinity(), boost::math::numeric_limits::infinity()); // - to + infinity. + } + else + { // Can only use max_value. + using boost::math::tools::max_value; + return boost::math::pair(-tools::max_value(), max_value()); // - to + max. + } +} + +template +BOOST_MATH_GPU_ENABLED inline RealType pdf(const landau_distribution& dist, const RealType& x) +{ + return detail::landau_pdf_imp(dist, x); +} // pdf + +template +BOOST_MATH_GPU_ENABLED inline RealType cdf(const landau_distribution& dist, const RealType& x) +{ + return detail::landau_cdf_imp(dist, x, false); +} // cdf + +template +BOOST_MATH_GPU_ENABLED inline RealType quantile(const landau_distribution& dist, const RealType& p) +{ + return detail::landau_quantile_imp(dist, p, false); +} // quantile + +template +BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) +{ + return detail::landau_cdf_imp(c.dist, c.param, true); +} // cdf complement + +template +BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) +{ + return detail::landau_quantile_imp(c.dist, c.param, true); +} // quantile complement + +template +BOOST_MATH_GPU_ENABLED inline RealType mean(const landau_distribution&) +{ // There is no mean: + typedef typename Policy::assert_undefined_type assert_type; + static_assert(assert_type::value == 0, "The Landau Distribution has no mean"); + + return policies::raise_domain_error( + "boost::math::mean(landau<%1%>&)", + "The Landau distribution does not have a mean: " + "the only possible return value is %1%.", + boost::math::numeric_limits::quiet_NaN(), Policy()); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType variance(const landau_distribution& /*dist*/) +{ + // There is no variance: + typedef typename Policy::assert_undefined_type assert_type; + static_assert(assert_type::value == 0, "The Landau Distribution has no variance"); + + return policies::raise_domain_error( + "boost::math::variance(landau<%1%>&)", + "The Landau distribution does not have a variance: " + "the only possible return value is %1%.", + boost::math::numeric_limits::quiet_NaN(), Policy()); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mode(const landau_distribution& dist) +{ + return detail::landau_mode_imp(dist); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType median(const landau_distribution& dist) +{ + return detail::landau_median_imp(dist); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType skewness(const landau_distribution& /*dist*/) +{ + // There is no skewness: + typedef typename Policy::assert_undefined_type assert_type; + static_assert(assert_type::value == 0, "The Landau Distribution has no skewness"); + + return policies::raise_domain_error( + "boost::math::skewness(landau<%1%>&)", + "The Landau distribution does not have a skewness: " + "the only possible return value is %1%.", + boost::math::numeric_limits::quiet_NaN(), Policy()); // infinity? +} + +template +BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const landau_distribution& /*dist*/) +{ + // There is no kurtosis: + typedef typename Policy::assert_undefined_type assert_type; + static_assert(assert_type::value == 0, "The Landau Distribution has no kurtosis"); + + return policies::raise_domain_error( + "boost::math::kurtosis(landau<%1%>&)", + "The Landau distribution does not have a kurtosis: " + "the only possible return value is %1%.", + boost::math::numeric_limits::quiet_NaN(), Policy()); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const landau_distribution& /*dist*/) +{ + // There is no kurtosis excess: + typedef typename Policy::assert_undefined_type assert_type; + static_assert(assert_type::value == 0, "The Landau Distribution has no kurtosis excess"); + + return policies::raise_domain_error( + "boost::math::kurtosis_excess(landau<%1%>&)", + "The Landau distribution does not have a kurtosis: " + "the only possible return value is %1%.", + boost::math::numeric_limits::quiet_NaN(), Policy()); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType entropy(const landau_distribution& dist) +{ + return detail::landau_entropy_imp(dist); +} + +}} // namespaces + + +#endif // BOOST_STATS_LANDAU_HPP diff --git a/include/boost/math/distributions/laplace.hpp b/include/boost/math/distributions/laplace.hpp index 81ae8fed9d..81a0abe1ab 100644 --- a/include/boost/math/distributions/laplace.hpp +++ b/include/boost/math/distributions/laplace.hpp @@ -1,6 +1,7 @@ // Copyright Thijs van den Berg, 2008. // Copyright John Maddock 2008. // Copyright Paul A. Bristow 2008, 2014. +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file @@ -17,11 +18,15 @@ #ifndef BOOST_STATS_LAPLACE_HPP #define BOOST_STATS_LAPLACE_HPP +#include +#include +#include #include #include #include #include -#include +#include +#include namespace boost{ namespace math{ @@ -43,7 +48,7 @@ class laplace_distribution // ---------------------------------- // Constructor(s) // ---------------------------------- - explicit laplace_distribution(RealType l_location = 0, RealType l_scale = 1) + BOOST_MATH_GPU_ENABLED explicit laplace_distribution(RealType l_location = 0, RealType l_scale = 1) : m_location(l_location), m_scale(l_scale) { RealType result; @@ -55,17 +60,17 @@ class laplace_distribution // Public functions // ---------------------------------- - RealType location() const + BOOST_MATH_GPU_ENABLED RealType location() const { return m_location; } - RealType scale() const + BOOST_MATH_GPU_ENABLED RealType scale() const { return m_scale; } - bool check_parameters(const char* function, RealType* result) const + BOOST_MATH_GPU_ENABLED bool check_parameters(const char* function, RealType* result) const { if(false == detail::check_scale(function, m_scale, result, Policy())) return false; if(false == detail::check_location(function, m_location, result, Policy())) return false; @@ -91,42 +96,42 @@ laplace_distribution(RealType,RealType)->laplace_distribution -inline std::pair range(const laplace_distribution&) +BOOST_MATH_GPU_ENABLED inline boost::math::pair range(const laplace_distribution&) { - if (std::numeric_limits::has_infinity) + BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits::has_infinity) { // Can use infinity. - return std::pair(-std::numeric_limits::infinity(), std::numeric_limits::infinity()); // - to + infinity. + return boost::math::pair(-boost::math::numeric_limits::infinity(), boost::math::numeric_limits::infinity()); // - to + infinity. } else { // Can only use max_value. using boost::math::tools::max_value; - return std::pair(-max_value(), max_value()); // - to + max value. + return boost::math::pair(-max_value(), max_value()); // - to + max value. } } template -inline std::pair support(const laplace_distribution&) +BOOST_MATH_GPU_ENABLED inline boost::math::pair support(const laplace_distribution&) { - if (std::numeric_limits::has_infinity) + BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits::has_infinity) { // Can Use infinity. - return std::pair(-std::numeric_limits::infinity(), std::numeric_limits::infinity()); // - to + infinity. + return boost::math::pair(-boost::math::numeric_limits::infinity(), boost::math::numeric_limits::infinity()); // - to + infinity. } else { // Can only use max_value. using boost::math::tools::max_value; - return std::pair(-max_value(), max_value()); // - to + max value. + return boost::math::pair(-max_value(), max_value()); // - to + max value. } } template -inline RealType pdf(const laplace_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType pdf(const laplace_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions // Checking function argument RealType result = 0; - const char* function = "boost::math::pdf(const laplace_distribution<%1%>&, %1%))"; + constexpr auto function = "boost::math::pdf(const laplace_distribution<%1%>&, %1%))"; // Check scale and location. if (false == dist.check_parameters(function, &result)) return result; @@ -152,13 +157,13 @@ inline RealType pdf(const laplace_distribution& dist, const Re } // pdf template -inline RealType logpdf(const laplace_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType logpdf(const laplace_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions // Checking function argument - RealType result = -std::numeric_limits::infinity(); - const char* function = "boost::math::logpdf(const laplace_distribution<%1%>&, %1%))"; + RealType result = -boost::math::numeric_limits::infinity(); + constexpr auto function = "boost::math::logpdf(const laplace_distribution<%1%>&, %1%))"; // Check scale and location. if (false == dist.check_parameters(function, &result)) @@ -178,8 +183,8 @@ inline RealType logpdf(const laplace_distribution& dist, const const RealType mu = dist.scale(); const RealType b = dist.location(); - // if b is 0 avoid divde by 0 error - if(abs(b) < std::numeric_limits::epsilon()) + // if b is 0 avoid divide by 0 error + if(abs(b) < boost::math::numeric_limits::epsilon()) { result = log(pdf(dist, x)); } @@ -194,13 +199,13 @@ inline RealType logpdf(const laplace_distribution& dist, const } // logpdf template -inline RealType cdf(const laplace_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const laplace_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // For ADL of std functions. RealType result = 0; // Checking function argument. - const char* function = "boost::math::cdf(const laplace_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const laplace_distribution<%1%>&, %1%)"; // Check scale and location. if (false == dist.check_parameters(function, &result)) return result; @@ -228,13 +233,13 @@ inline RealType cdf(const laplace_distribution& dist, const Re } // cdf template -inline RealType logcdf(const laplace_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType logcdf(const laplace_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // For ADL of std functions. RealType result = 0; // Checking function argument. - const char* function = "boost::math::logcdf(const laplace_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::logcdf(const laplace_distribution<%1%>&, %1%)"; // Check scale and location. if (false == dist.check_parameters(function, &result)) { @@ -273,13 +278,13 @@ inline RealType logcdf(const laplace_distribution& dist, const } // logcdf template -inline RealType quantile(const laplace_distribution& dist, const RealType& p) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const laplace_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std functions. // Checking function argument RealType result = 0; - const char* function = "boost::math::quantile(const laplace_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const laplace_distribution<%1%>&, %1%)"; if (false == dist.check_parameters(function, &result)) return result; if(false == detail::check_probability(function, p, &result, Policy())) return result; @@ -311,7 +316,7 @@ inline RealType quantile(const laplace_distribution& dist, con template -inline RealType cdf(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { // Calculate complement of cdf. BOOST_MATH_STD_USING // for ADL of std functions @@ -322,7 +327,7 @@ inline RealType cdf(const complemented2_type -inline RealType logcdf(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType logcdf(const complemented2_type, RealType>& c) { // Calculate complement of logcdf. BOOST_MATH_STD_USING // for ADL of std functions @@ -359,7 +364,7 @@ inline RealType logcdf(const complemented2_type -inline RealType quantile(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions. @@ -400,17 +405,17 @@ inline RealType quantile(const complemented2_type::infinity(); + return boost::math::numeric_limits::infinity(); } if(q == 1) { - return -std::numeric_limits::infinity(); + return -boost::math::numeric_limits::infinity(); } if(false == detail::check_probability(function, q, &result, Policy())) return result; @@ -424,49 +429,49 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const laplace_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mean(const laplace_distribution& dist) { return dist.location(); } template -inline RealType standard_deviation(const laplace_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType standard_deviation(const laplace_distribution& dist) { return constants::root_two() * dist.scale(); } template -inline RealType mode(const laplace_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mode(const laplace_distribution& dist) { return dist.location(); } template -inline RealType median(const laplace_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType median(const laplace_distribution& dist) { return dist.location(); } template -inline RealType skewness(const laplace_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType skewness(const laplace_distribution& /*dist*/) { return 0; } template -inline RealType kurtosis(const laplace_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const laplace_distribution& /*dist*/) { return 6; } template -inline RealType kurtosis_excess(const laplace_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const laplace_distribution& /*dist*/) { return 3; } template -inline RealType entropy(const laplace_distribution & dist) +BOOST_MATH_GPU_ENABLED inline RealType entropy(const laplace_distribution & dist) { using std::log; return log(2*dist.scale()*constants::e()); diff --git a/include/boost/math/distributions/logistic.hpp b/include/boost/math/distributions/logistic.hpp index d12de48c59..56dc6e9f2f 100644 --- a/include/boost/math/distributions/logistic.hpp +++ b/include/boost/math/distributions/logistic.hpp @@ -1,5 +1,5 @@ // Copyright 2008 Gautam Sewani -// +// Copyright 2024 Matt Borland // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt @@ -8,12 +8,17 @@ #ifndef BOOST_MATH_DISTRIBUTIONS_LOGISTIC #define BOOST_MATH_DISTRIBUTIONS_LOGISTIC +#include +#include +#include +#include #include #include #include #include #include -#include +#include +#include namespace boost { namespace math { @@ -24,22 +29,22 @@ namespace boost { namespace math { typedef RealType value_type; typedef Policy policy_type; - logistic_distribution(RealType l_location=0, RealType l_scale=1) // Constructor. + BOOST_MATH_GPU_ENABLED logistic_distribution(RealType l_location=0, RealType l_scale=1) // Constructor. : m_location(l_location), m_scale(l_scale) { - static const char* function = "boost::math::logistic_distribution<%1%>::logistic_distribution"; + constexpr auto function = "boost::math::logistic_distribution<%1%>::logistic_distribution"; RealType result; detail::check_scale(function, l_scale, &result, Policy()); detail::check_location(function, l_location, &result, Policy()); } // Accessor functions. - RealType scale()const + BOOST_MATH_GPU_ENABLED RealType scale()const { return m_scale; } - RealType location()const + BOOST_MATH_GPU_ENABLED RealType location()const { return m_location; } @@ -60,26 +65,26 @@ namespace boost { namespace math { #endif template - inline const std::pair range(const logistic_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair range(const logistic_distribution& /* dist */) { // Range of permissible values for random variable x. using boost::math::tools::max_value; - return std::pair( - std::numeric_limits::has_infinity ? -std::numeric_limits::infinity() : -max_value(), - std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : max_value()); + return boost::math::pair( + boost::math::numeric_limits::has_infinity ? -boost::math::numeric_limits::infinity() : -max_value(), + boost::math::numeric_limits::has_infinity ? boost::math::numeric_limits::infinity() : max_value()); } template - inline const std::pair support(const logistic_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair support(const logistic_distribution& /* dist */) { // Range of supported values for random variable x. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. using boost::math::tools::max_value; - return std::pair(-max_value(), max_value()); // - to + infinity + return boost::math::pair(-max_value(), max_value()); // - to + infinity } template - inline RealType pdf(const logistic_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED inline RealType pdf(const logistic_distribution& dist, const RealType& x) { - static const char* function = "boost::math::pdf(const logistic_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::pdf(const logistic_distribution<%1%>&, %1%)"; RealType scale = dist.scale(); RealType location = dist.location(); RealType result = 0; @@ -114,12 +119,12 @@ namespace boost { namespace math { } template - inline RealType cdf(const logistic_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED inline RealType cdf(const logistic_distribution& dist, const RealType& x) { RealType scale = dist.scale(); RealType location = dist.location(); RealType result = 0; // of checks. - static const char* function = "boost::math::cdf(const logistic_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const logistic_distribution<%1%>&, %1%)"; if(false == detail::check_scale(function, scale, &result, Policy())) { return result; @@ -149,12 +154,12 @@ namespace boost { namespace math { } template - inline RealType logcdf(const logistic_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED inline RealType logcdf(const logistic_distribution& dist, const RealType& x) { RealType scale = dist.scale(); RealType location = dist.location(); RealType result = 0; // of checks. - static const char* function = "boost::math::cdf(const logistic_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const logistic_distribution<%1%>&, %1%)"; if(false == detail::check_scale(function, scale, &result, Policy())) { return result; @@ -192,13 +197,13 @@ namespace boost { namespace math { } template - inline RealType quantile(const logistic_distribution& dist, const RealType& p) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const logistic_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING RealType location = dist.location(); RealType scale = dist.scale(); - static const char* function = "boost::math::quantile(const logistic_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const logistic_distribution<%1%>&, %1%)"; RealType result = 0; if(false == detail::check_scale(function, scale, &result, Policy())) @@ -228,13 +233,13 @@ namespace boost { namespace math { } // RealType quantile(const logistic_distribution& dist, const RealType& p) template - inline RealType cdf(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING RealType location = c.dist.location(); RealType scale = c.dist.scale(); RealType x = c.param; - static const char* function = "boost::math::cdf(const complement(logistic_distribution<%1%>&), %1%)"; + constexpr auto function = "boost::math::cdf(const complement(logistic_distribution<%1%>&), %1%)"; RealType result = 0; if(false == detail::check_scale(function, scale, &result, Policy())) @@ -263,13 +268,13 @@ namespace boost { namespace math { } template - inline RealType logcdf(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType logcdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING RealType location = c.dist.location(); RealType scale = c.dist.scale(); RealType x = c.param; - static const char* function = "boost::math::cdf(const complement(logistic_distribution<%1%>&), %1%)"; + constexpr auto function = "boost::math::cdf(const complement(logistic_distribution<%1%>&), %1%)"; RealType result = 0; if(false == detail::check_scale(function, scale, &result, Policy())) @@ -299,12 +304,12 @@ namespace boost { namespace math { } template - inline RealType quantile(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING RealType scale = c.dist.scale(); RealType location = c.dist.location(); - static const char* function = "boost::math::quantile(const complement(logistic_distribution<%1%>&), %1%)"; + constexpr auto function = "boost::math::quantile(const complement(logistic_distribution<%1%>&), %1%)"; RealType result = 0; if(false == detail::check_scale(function, scale, &result, Policy())) return result; @@ -335,13 +340,13 @@ namespace boost { namespace math { } template - inline RealType mean(const logistic_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mean(const logistic_distribution& dist) { return dist.location(); } // RealType mean(const logistic_distribution& dist) template - inline RealType variance(const logistic_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType variance(const logistic_distribution& dist) { BOOST_MATH_STD_USING RealType scale = dist.scale(); @@ -349,36 +354,36 @@ namespace boost { namespace math { } // RealType variance(const logistic_distribution& dist) template - inline RealType mode(const logistic_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mode(const logistic_distribution& dist) { return dist.location(); } template - inline RealType median(const logistic_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType median(const logistic_distribution& dist) { return dist.location(); } template - inline RealType skewness(const logistic_distribution& /*dist*/) + BOOST_MATH_GPU_ENABLED inline RealType skewness(const logistic_distribution& /*dist*/) { return 0; } // RealType skewness(const logistic_distribution& dist) template - inline RealType kurtosis_excess(const logistic_distribution& /*dist*/) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const logistic_distribution& /*dist*/) { return static_cast(6)/5; } // RealType kurtosis_excess(const logistic_distribution& dist) template - inline RealType kurtosis(const logistic_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const logistic_distribution& dist) { return kurtosis_excess(dist) + 3; } // RealType kurtosis_excess(const logistic_distribution& dist) template - inline RealType entropy(const logistic_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType entropy(const logistic_distribution& dist) { using std::log; return 2 + log(dist.scale()); diff --git a/include/boost/math/distributions/lognormal.hpp b/include/boost/math/distributions/lognormal.hpp index 3c8f576e56..dfc3e4b2a2 100644 --- a/include/boost/math/distributions/lognormal.hpp +++ b/include/boost/math/distributions/lognormal.hpp @@ -1,4 +1,5 @@ // Copyright John Maddock 2006. +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -10,12 +11,15 @@ // http://mathworld.wolfram.com/LogNormalDistribution.html // http://en.wikipedia.org/wiki/Lognormal_distribution +#include +#include +#include #include #include #include #include - -#include +#include +#include namespace boost{ namespace math { @@ -23,7 +27,7 @@ namespace detail { template - inline bool check_lognormal_x( + BOOST_MATH_GPU_ENABLED inline bool check_lognormal_x( const char* function, RealType const& x, RealType* result, const Policy& pol) @@ -48,7 +52,7 @@ class lognormal_distribution typedef RealType value_type; typedef Policy policy_type; - lognormal_distribution(RealType l_location = 0, RealType l_scale = 1) + BOOST_MATH_GPU_ENABLED lognormal_distribution(RealType l_location = 0, RealType l_scale = 1) : m_location(l_location), m_scale(l_scale) { RealType result; @@ -56,12 +60,12 @@ class lognormal_distribution detail::check_location("boost::math::lognormal_distribution<%1%>::lognormal_distribution", l_location, &result, Policy()); } - RealType location()const + BOOST_MATH_GPU_ENABLED RealType location()const { return m_location; } - RealType scale()const + BOOST_MATH_GPU_ENABLED RealType scale()const { return m_scale; } @@ -83,29 +87,29 @@ lognormal_distribution(RealType,RealType)->lognormal_distribution -inline const std::pair range(const lognormal_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline const boost::math::pair range(const lognormal_distribution& /*dist*/) { // Range of permissible values for random variable x is >0 to +infinity. using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); + return boost::math::pair(static_cast(0), max_value()); } template -inline const std::pair support(const lognormal_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline const boost::math::pair support(const lognormal_distribution& /*dist*/) { // Range of supported values for random variable x. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); + return boost::math::pair(static_cast(0), max_value()); } template -RealType pdf(const lognormal_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED RealType pdf(const lognormal_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions RealType mu = dist.location(); RealType sigma = dist.scale(); - static const char* function = "boost::math::pdf(const lognormal_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::pdf(const lognormal_distribution<%1%>&, %1%)"; RealType result = 0; if(0 == detail::check_scale(function, sigma, &result, Policy())) @@ -129,11 +133,11 @@ RealType pdf(const lognormal_distribution& dist, const RealTyp } template -inline RealType cdf(const lognormal_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const lognormal_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(const lognormal_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const lognormal_distribution<%1%>&, %1%)"; RealType result = 0; if(0 == detail::check_scale(function, dist.scale(), &result, Policy())) @@ -151,11 +155,11 @@ inline RealType cdf(const lognormal_distribution& dist, const } template -inline RealType quantile(const lognormal_distribution& dist, const RealType& p) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const lognormal_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const lognormal_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const lognormal_distribution<%1%>&, %1%)"; RealType result = 0; if(0 == detail::check_scale(function, dist.scale(), &result, Policy())) @@ -175,11 +179,11 @@ inline RealType quantile(const lognormal_distribution& dist, c } template -inline RealType cdf(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(const lognormal_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const lognormal_distribution<%1%>&, %1%)"; RealType result = 0; if(0 == detail::check_scale(function, c.dist.scale(), &result, Policy())) @@ -197,11 +201,11 @@ inline RealType cdf(const complemented2_type -inline RealType quantile(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const lognormal_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const lognormal_distribution<%1%>&, %1%)"; RealType result = 0; if(0 == detail::check_scale(function, c.dist.scale(), &result, Policy())) @@ -221,7 +225,7 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const lognormal_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mean(const lognormal_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions @@ -238,7 +242,7 @@ inline RealType mean(const lognormal_distribution& dist) } template -inline RealType variance(const lognormal_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType variance(const lognormal_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions @@ -255,7 +259,7 @@ inline RealType variance(const lognormal_distribution& dist) } template -inline RealType mode(const lognormal_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mode(const lognormal_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions @@ -272,7 +276,7 @@ inline RealType mode(const lognormal_distribution& dist) } template -inline RealType median(const lognormal_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType median(const lognormal_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions RealType mu = dist.location(); @@ -280,7 +284,7 @@ inline RealType median(const lognormal_distribution& dist) } template -inline RealType skewness(const lognormal_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType skewness(const lognormal_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions @@ -300,7 +304,7 @@ inline RealType skewness(const lognormal_distribution& dist) } template -inline RealType kurtosis(const lognormal_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const lognormal_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions @@ -318,7 +322,7 @@ inline RealType kurtosis(const lognormal_distribution& dist) } template -inline RealType kurtosis_excess(const lognormal_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const lognormal_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions @@ -336,9 +340,9 @@ inline RealType kurtosis_excess(const lognormal_distribution& } template -inline RealType entropy(const lognormal_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType entropy(const lognormal_distribution& dist) { - using std::log; + BOOST_MATH_STD_USING RealType mu = dist.location(); RealType sigma = dist.scale(); return mu + log(constants::two_pi()*constants::e()*sigma*sigma)/2; diff --git a/include/boost/math/distributions/mapairy.hpp b/include/boost/math/distributions/mapairy.hpp new file mode 100644 index 0000000000..8bf1f990c1 --- /dev/null +++ b/include/boost/math/distributions/mapairy.hpp @@ -0,0 +1,4220 @@ +// Copyright Takuma Yoshimura 2024. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_STATS_MAPAIRY_HPP +#define BOOST_STATS_MAPAIRY_HPP + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4127) // conditional expression is constant +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef BOOST_MATH_HAS_NVRTC +#include +#include +#endif + +namespace boost { namespace math { +template +class mapairy_distribution; + +namespace detail { + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_pdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x < 1) { + // Rational Approximation + // Maximum Relative Error: 3.7591e-18 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(1.97516171847191855610e-1), + static_cast(3.67488253628465083737e-2), + static_cast(-9.73242224038828612673e-4), + static_cast(2.32207514136635673061e-3), + static_cast(5.69067907423210669037e-5), + static_cast(-6.02637387141524535193e-5), + static_cast(1.04960324426666933327e-5), + static_cast(-6.58470237954242016920e-7), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(7.09464351647314165710e-1), + static_cast(3.66413036246461392316e-1), + static_cast(1.10947882302862241488e-1), + static_cast(2.65928486676817177159e-2), + static_cast(3.75507284977386290874e-3), + static_cast(4.03789594641339005785e-4), + }; + + result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x); + } + else if (x < 2) { + RealType t = x - 1; + + // Rational Approximation + // Maximum Relative Error: 1.5996e-20 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(1.06251243013238748252e-1), + static_cast(1.38178831205785069108e-2), + static_cast(4.19280374368049006206e-3), + static_cast(8.54607219684690930289e-4), + static_cast(-7.46881084120928210702e-5), + static_cast(1.47110856483345063335e-5), + static_cast(-1.30090180307471994500e-6), + static_cast(5.24801123304330014713e-8), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1.), + static_cast(8.10853683888611687140e-1), + static_cast(3.89361261627717143905e-1), + static_cast(1.15124062681082170577e-1), + static_cast(2.38803416611949902468e-2), + static_cast(3.08616898814509065071e-3), + static_cast(2.43760043942846261876e-4), + static_cast(1.34538901435238836768e-6), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 4) { + RealType t = x - 2; + + // Rational Approximation + // Maximum Relative Error: 1.1592e-17 + BOOST_MATH_STATIC const RealType P[9] = { + static_cast(5.33842514891989443409e-2), + static_cast(1.23301980674903270971e-2), + static_cast(3.45717831433988631923e-3), + static_cast(3.27034449923176875761e-4), + static_cast(1.20406794831890291348e-5), + static_cast(5.77489170397965604669e-7), + static_cast(-1.15255267205685159063e-7), + static_cast(9.15896323073109992939e-9), + static_cast(-3.14068002815368247985e-10), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(9.08772985520393226044e-1), + static_cast(4.26418573702560818267e-1), + static_cast(1.22033746594868893316e-1), + static_cast(2.27934009200310243172e-2), + static_cast(2.60658999011198623962e-3), + static_cast(1.54461660261435227768e-4), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 8) { + RealType t = x - 4; + + // Rational Approximation + // Maximum Relative Error: 9.2228e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(1.58950538583133457384e-2), + static_cast(7.47835440063141601948e-3), + static_cast(1.81137244353261478410e-3), + static_cast(2.26935565382135588558e-4), + static_cast(1.43877113825683795505e-5), + static_cast(2.08242747557417233626e-7), + static_cast(-1.54976465724771282989e-9), + static_cast(1.30762989300333026019e-11), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1.), + static_cast(9.95505437381674174441e-1), + static_cast(4.58882737262511297099e-1), + static_cast(1.25031310192148865496e-1), + static_cast(2.15727229249904102247e-2), + static_cast(2.33597081566665672569e-3), + static_cast(1.45198998318300328562e-4), + static_cast(3.87962234445835345676e-6), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 16) { + RealType t = x - 8; + + // Rational Approximation + // Maximum Relative Error: 1.0257e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(3.22517551525042172428e-3), + static_cast(1.12822806030796339659e-3), + static_cast(1.54489389961322571031e-4), + static_cast(9.28479992527909796427e-6), + static_cast(2.06168350199745832262e-7), + static_cast(9.05110751997021418539e-10), + static_cast(-2.15498112371756202097e-12), + static_cast(6.41838355699777435924e-15), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1.), + static_cast(6.53390465399680164234e-1), + static_cast(1.82759048270449018482e-1), + static_cast(2.80407546367978533849e-2), + static_cast(2.50853443923476718145e-3), + static_cast(1.27671852825846245421e-4), + static_cast(3.28380135691060279203e-6), + static_cast(3.06545317089055335742e-8), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 32) { + RealType t = x - 16; + + // Rational Approximation + // Maximum Relative Error: 6.0510e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(5.82527663232857270992e-4), + static_cast(6.89502117025124630567e-5), + static_cast(2.24909795087265741433e-6), + static_cast(2.18576787334972903790e-8), + static_cast(3.39014723444178274435e-11), + static_cast(-9.74481309265612390297e-15), + static_cast(-1.13308546492906818388e-16), + static_cast(5.32472028720777735712e-19), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(2.74018883667663396766e-1), + static_cast(2.95901195665990089660e-2), + static_cast(1.57901733512147920251e-3), + static_cast(4.24965124147621236633e-5), + static_cast(5.17522027193205842016e-7), + static_cast(2.00522219276570039934e-9), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 64) { + RealType t = x - 32; + + // Rational Approximation + // Maximum Relative Error: 7.3294e-18 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(1.03264853379349880039e-4), + static_cast(5.35256306644392405447e-6), + static_cast(9.00657716972118816692e-8), + static_cast(5.34913574042209793720e-10), + static_cast(6.70752605041678779380e-13), + static_cast(-5.30089923101856817552e-16), + static_cast(7.28133811621687143754e-19), + static_cast(-7.38047553655951666420e-22), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(1.29920843258164337377e-1), + static_cast(6.75018577147646502386e-3), + static_cast(1.77694968039695671819e-4), + static_cast(2.46428299911920942946e-6), + static_cast(1.67165053157990942546e-8), + static_cast(4.19496974141131087116e-11), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + RealType t = 1 / sqrt(x * x * x); + + // Rational Approximation + // Maximum Relative Error: 5.6693e-20 + BOOST_MATH_STATIC const RealType P[5] = { + static_cast(5.98413420602149016910e-1), + static_cast(3.14584075817417883086e-5), + static_cast(1.62977928311793051895e1), + static_cast(-4.12903117172994371875e-4), + static_cast(-1.06404478702135751872e2), + }; + BOOST_MATH_STATIC const RealType Q[3] = { + static_cast(1.), + static_cast(5.25696892802060720079e-5), + static_cast(4.03600055498020483920e1), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t / x; + } + + return result; +} + + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_pdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x < 1) { + // Rational Approximation + // Maximum Relative Error: 7.8308e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.97516171847191855609649452292217911973e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17531822787252717270400174744562144891e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.85115358761409188259685286269086053296e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18029395189535552537870932989876189597e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.77412874842522285996566741532939343827e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.77992070255086842672551073580133785334e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.54573264286260796576738952968288691782e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.94764012694602906119831079380500255557e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.97596258932025712802674070104281981323e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.45466169112247379589927514614067756956e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.99760415118300349769641418430273526815e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43150486566834492207695241913522311930e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46130347604880355784938321408765318948e-13), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11845869711743584628289654085905424438e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.80391154854347711297249357734993136108e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.75628443538173255184583966965162835227e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41016303833742742212624596040074202424e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.19142300833563644046500846364541891138e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02421707708633106515934651956262614532e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.03973732602338507411104824853671547615e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.35206168908201402570766383018708660819e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.38602606623008690327520130558254165564e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.53740175911385378188372963739884519312e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.27513004715414297729539702862351044344e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.54510493017251997793679126704007098265e-8), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x); + } + else if (x < 2) { + RealType t = x - 1; + + // Rational Approximation + // Maximum Relative Error: 3.0723e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06251243013238748252181151646220197947e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.92438638323563234519452281479338921158e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.83335793178622701784730867677919844599e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.84159075203218824591724451142550478306e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.04213732090358859917896442076931334722e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.72388220651785798237487005913708387756e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.36099324022668533012286817710272936865e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.74483270731217433628720245792741986795e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.56461597064783966758904403291149549559e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.28590608939674970691948223694855264817e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.81756745849477762773082030302943341729e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.65915115243311285178083515017249358853e-12), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33250387018216706082200927591739589024e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.71707718560216685629188467984384070512e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.81316277289673837399162302797006618384e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78475951599121894570443981591530879087e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.16167801098514576400689883575304687623e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19167794366424137722223009369062644830e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.20831082064982892777497773490792080382e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.27196399162146247210036306870401328410e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.79335434374966775903734846875100087590e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.30825409557870847168672662674521614782e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.97296173230649275943984471731360073540e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.48943057909563158917114503727080517958e-9), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 4) { + RealType t = x - 2; + + // Rational Approximation + // Maximum Relative Error: 4.0903e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.33842514891989443409465171800884519331e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.53264053296761245408991932692426094424e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.23210520807186629205810670362048049836e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.71104271443590027208545022968625306496e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.98781446716778138729774954595209697813e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.98895829308616657174932023565302947632e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.25993639218721804661037829873135732687e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.64669776700609853276056375742089715662e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.11846243382610611156151291892877027869e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.74830086064868141326053648144496072795e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.07549997153431643849551871367000763445e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.10030596535721362628619523622308581344e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19376016170255697546854583591494809062e-13), + }; + BOOST_MATH_STATIC const RealType Q[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52686177278870816414637961315363468426e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19872083945442288336636376283295310445e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.26633866969676511944680471882188527224e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41261867539396133951024374504099977090e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.18852182132645783844766153200510014113e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70152126044106007357033814742158353948e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.23810508827493234517751339979902448944e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.96161313274648769113605163816403306967e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.06693316156193327359541953619174255726e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.79366356086062616343285660797389238271e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.14585835815353770175366834099001313472e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.05314631662369743547568064896403143693e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.90325380271096603676911761784650800378e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.36933359079566550212098911224675011839e-12), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 8) { + RealType t = x - 4; + + // Rational Approximation + // Maximum Relative Error: 6.5015e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.58950538583133457383574346194006716984e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.25447644411503971725638816502617490834e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47605882774114100209665040117276675598e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.12224864838900383464124716266085521485e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.79164249640537972514574059182421325541e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.89668438166714230032406615413991628135e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.44410389750700463263686630222653669837e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.14788978994687095829140113472609739982e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.79821680629333600844514042061772236495e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.49636960435731257154960798035854124639e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.70554745768928821263556963261516872171e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.42293994855343109617040824208078534205e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37599287094703195312894833570340165019e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.35248179978735448062307216459232932068e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.53569375838863862590910010617140120876e-18), + }; + BOOST_MATH_STATIC const RealType Q[17] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.94337325681904859647161946168957959628e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.77120402023938328899162557073347121463e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01644685191130734907530007424741314392e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.12479655123720440909164080517207084404e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.25556010526357752360439314019567992245e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.96143273204038192262150849394970544022e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.50612932318889495209230176354364299236e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.12160918304376427109905628326638480473e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.47696044292604039527013647985997661762e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.64067652576843720823459199100800335854e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.00745166063635113130434111509648306420e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.05398901239421768403763864060147286105e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05698259572340563109985785513355912114e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19362835269415404005406782719825077472e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.15444386779802728200716489787161419304e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.02452666470008756043350040893761339083e-16), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 16) { + RealType t = x - 8; + + // Rational Approximation + // Maximum Relative Error: 2.0995e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[16] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.22517551525042172427941302520759668293e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.86576974828476461442549217748945498966e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18419822818191546598384139622512477000e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.98396184944524020019688823190946146641e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.06686400532599396487775148973665625687e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.05680178109228687159829475615095925679e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.17554487015345146749705505971350254902e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.14774751685364429557883242232797329274e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33266124509168360207594600356349282805e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.76332756800842989348756910429214676252e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.60639771339252642992277508068105926919e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.41859490403554144799385471141184829903e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.77177795293424055655391515546880774987e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.76106923344461402353501262620681801053e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.68829978902134103249656805130103045021e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.42496376687241918803028631991083570963e-26), + }; + BOOST_MATH_STATIC const RealType Q[16] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19213376162053391168605415200906099633e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.65578261958732385181558047087365997878e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30046653564394292929001223763106276016e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.48388301731958697028701215596777178117e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.50873786049439122933188684993719288258e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23255654647151798865208394342856435797e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20861791399969402003082323686080041040e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.96882049090731653763684812243275884213e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.98669985741073085290012296575736698103e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.03383311816835346577432387682379226740e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.87320682938150375144724980774245810905e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13573468677076838075146150847170057373e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34526045003716422620879156626237175127e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.35681579117696161282979297336282783473e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92944288060269290125987728528698476197e-18), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 32) { + RealType t = x - 16; + + // Rational Approximation + // Maximum Relative Error: 2.0937e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.82527663232857270992129793621400616909e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41696401156754081476312871174198295322e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.42036620449365724707919875710197564857e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.67076745288708619632303078677641380627e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.14278954094278648593125010577441869646e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40092485054621853149602511539550254471e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.17755660009065973828053533035808718033e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.23871371557251644837598540542648782066e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04069998646037977439620128812310273053e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.94055978349016208777803296823455779097e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.29866428982892883091537921429389750973e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.06056281963023929277728535486590256573e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.57963857545037466186123981516026589992e-24), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.81390322233700529779563477285232205886e-28), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.52190981930441828041102818178755246228e-31), + }; + BOOST_MATH_STATIC const RealType Q[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.70564782441895707961338319466546005093e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47770566490107388849474183308889339231e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29364672385303439788399215507370006639e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.37279274083988250795581105436675097881e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.72124151284421794872333348562536468054e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.96970247774973902625712414297788402746e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.38395055453444011915661055983937917120e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.19605460410208704830882138883730331113e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76945301389475508747530234950023648137e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.33624384932503964160642677987886086890e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.01155130710615988897664213446593907596e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.03959317021567084067518847978890548086e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78213669817351488671519066803835958715e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75492332026736176991870807903277324902e-22), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 64) { + RealType t = x - 32; + + // Rational Approximation + // Maximum Relative Error: 1.5856e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.03264853379349880038687006045193401399e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.79539964604630527636184900467871907171e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34840369549460790638336121351837912308e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.73087351972154879439617719914590729748e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.51775493325347153520115736204545037264e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.60104651860674451546102708885530128768e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.90233449697112559539826150932808197444e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06978852724410115655105118663137681992e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.00399855296672416041126220131900937128e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.18139748830278263202087699889457673035e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.43070756487288399784700274808326343543e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.70126687893706466023887757573369648552e-27), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.29405234560873665664952418690159194840e-30), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.69069082510020066864633718082941688708e-34), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.33468198065176301137949068264633336529e-37), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.51951069241510130465691156908893803280e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.84647597299970149588010858770320631739e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.90239396588176334117512714878489376365e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.35551585337774834346900776840459179841e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.53375746264539501168763602838029023222e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.42421935941736734247914078641324315900e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.23835501607741697737129504173606231513e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.79603272375172813955236187874231935324e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.44624821303153251954931367754173356213e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.10635081308984534416704147448323126303e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.14627867347129520510628554651739571006e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43792928765659831045040802615903432044e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.79856365207259871336606847582889916798e-25), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + RealType t = 1 / sqrt(x * x * x); + + // Rational Approximation + // Maximum Relative Error: 3.5081e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[8] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.98413420602149016909919089901572802714e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.30303835860684077803651094768293625633e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.89097726237252419724261295392691855545e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.12696604472230480273239741428914666511e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.84517621403071494824886152940942995151e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.67577378292168927009421205756730205227e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.16343347002845084264982358165052437094e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.59558963351172885545760841064831356701e3), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.51965956124978480521462518750569617550e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.61700833299761977287211297600922591853e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.94988298508869748383898344668918510537e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.52494213749069142804725453333400335525e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20093079283917759611690534481918040882e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.82564796242972192725215815897475246715e4), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t / x; + } + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_pdf_minus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x >= -1) { + RealType t = x + 1; + + // Rational Approximation + // Maximum Relative Error: 3.7525e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(2.76859868856746781256e-1), + static_cast(1.10489814676299003241e-1), + static_cast(-6.25690643488236678667e-3), + static_cast(-1.17905420222527577236e-3), + static_cast(1.27188963720084274122e-3), + static_cast(-7.20575105181207907889e-5), + static_cast(-2.22575633858411851032e-5), + static_cast(2.94270091008508492304e-6), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(4.98673671503410894284e-1), + static_cast(3.15907666864554716291e-1), + static_cast(8.34463558393629855977e-2), + static_cast(2.71804643993972494173e-2), + static_cast(3.52187050938036578406e-3), + static_cast(7.03072974279509263844e-4), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -2) { + RealType t = x + 2; + + // Rational Approximation + // Maximum Relative Error: 4.0995e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(2.14483832832989822788e-1), + static_cast(3.72789690317712876663e-1), + static_cast(1.86473650057086284496e-1), + static_cast(1.31182724166379598907e-2), + static_cast(-9.00695064809774432392e-3), + static_cast(3.46884420664996747052e-4), + static_cast(4.88651392754189961173e-4), + static_cast(-6.13516242712196835055e-5), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1.), + static_cast(1.06478618107122200489e0), + static_cast(4.08809060854459518663e-1), + static_cast(2.66617598099501800866e-1), + static_cast(4.53526315786051807494e-2), + static_cast(2.44078693689626940834e-2), + static_cast(1.52822572478697831870e-3), + static_cast(8.69480001029742502197e-4), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + RealType s = exp(2 * x * x * x / 27) * sqrt(-x); + + if (x >= -4) { + RealType t = -x - 2; + + // Rational Approximation + // Maximum Relative Error: 2.4768e-18 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(2.74308494787955998605e-1), + static_cast(4.87765991440983416392e-1), + static_cast(3.84524365110270427617e-1), + static_cast(1.77409497505926097339e-1), + static_cast(5.25612864287310961520e-2), + static_cast(1.01528615034079765421e-2), + static_cast(1.20417225696161842090e-3), + static_cast(6.97462693097107007719e-5), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1.), + static_cast(1.81256903248465876424e0), + static_cast(1.43959302060852067876e0), + static_cast(6.65882284117861804351e-1), + static_cast(1.97537712781845593211e-1), + static_cast(3.81732970028510912201e-2), + static_cast(4.52767489928026542226e-3), + static_cast(2.62240194911920120003e-4), + }; + + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -8) { + RealType t = -x - 4; + + // Rational Approximation + // Maximum Relative Error: 1.5741e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(2.67391547707456587286e-1), + static_cast(3.39319035621314371924e-1), + static_cast(1.85434799940724207230e-1), + static_cast(5.63667456320679857693e-2), + static_cast(1.01231164548944177474e-2), + static_cast(1.02501575174439362864e-3), + static_cast(4.60769537123286016400e-5), + static_cast(-4.92754650783224582641e-13), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(1.27271216837333318516e0), + static_cast(6.96551952883867277759e-1), + static_cast(2.11871363524516350422e-1), + static_cast(3.80622887806509632537e-2), + static_cast(3.85400280812991562328e-3), + static_cast(1.73246593953823694311e-4), + }; + + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -16) { + RealType t = -x - 8; + + // Rational Approximation + // Maximum Relative Error: 4.6579e-17 + BOOST_MATH_STATIC const RealType P[6] = { + static_cast(2.66153901932100301337e-1), + static_cast(1.65767350677458230714e-1), + static_cast(4.19801402197670061146e-2), + static_cast(5.39337995172784579558e-3), + static_cast(3.50811247702301287586e-4), + static_cast(9.21758454778883157515e-6), + }; + BOOST_MATH_STATIC const RealType Q[6] = { + static_cast(1.), + static_cast(6.23092941554668369107e-1), + static_cast(1.57829914506366827914e-1), + static_cast(2.02787979758160988615e-2), + static_cast(1.31903008994475216511e-3), + static_cast(3.46575870637847438219e-5), + }; + + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -32) { + RealType t = -x - 16; + + // Rational Approximation + // Maximum Relative Error: 5.2014e-17 + BOOST_MATH_STATIC const RealType P[5] = { + static_cast(2.65985830928929730672e-1), + static_cast(7.19655029633308583205e-2), + static_cast(7.26293125679558421946e-3), + static_cast(3.24276402295343802262e-4), + static_cast(5.40508013573989841127e-6), + }; + BOOST_MATH_STATIC const RealType Q[5] = { + static_cast(1.), + static_cast(2.70578525590448009961e-1), + static_cast(2.73082032706004833847e-2), + static_cast(1.21926059813954504560e-3), + static_cast(2.03227900426552177849e-5), + }; + + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + result = 0; + } + } + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_pdf_minus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x >= -1) { + RealType t = x + 1; + + // Rational Approximation + // Maximum Relative Error: 5.2870e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.76859868856746781256050397658493368372e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13037642242224438972685982606987140111e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.93206268361082760254653961897373271146e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12844418906916902333116398594921450782e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.36889326770180267250286619759335338794e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.95272615884641416804001553871108995422e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.53808638264746233799776679481568171506e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.92177790427881393122479399837010657693e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.93492737815019893169693306410980499366e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.87510085148730083683110532987841223544e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.28469424017979299382094276157986775969e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.83693904015623816528442886551032709693e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.77632857558257155545506847333166147492e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00448215148716947837105979735199471601e-11), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.69069814466926608209872727645156315374e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.89657828158127300370734997707096744077e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.62713433978940724622996782534485162816e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.91600878366366974062522408704458777166e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.89144035500328704769924414014440238441e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.35263616916053275381069097012458200491e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.49136684724986851824746531490006769036e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65912003138912073317982729161392623277e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.65931144405541620572732754508534372034e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.40193555853535182510951061797573338442e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.43625211359756249232841566256877823039e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.33207781577559817130740123609636060998e-8), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -2) { + RealType t = x + 2; + + // Rational Approximation + // Maximum Relative Error: 1.1977e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.14483832832989822788477500521594411868e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75657192307644021285091474845448102656e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40437358470633234235031852091608646844e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.66609942512054705023295445270747546208e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.54563774151184610728476161049657676321e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.51479057544157089574005315379453615537e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.59853789372610909788599341307719626846e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.76919062715142378209907670793921883406e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.58572738466179822770103948740437237476e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.66618046393835590932491510543557226290e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.26253044828460469263564567571249315188e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11130363073235247786909976446790746902e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.49023728251751416730708805268921994420e-10), + }; + BOOST_MATH_STATIC const RealType Q[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.11919889346080886194925406930280687022e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.99082771425048574611745923487528183522e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99525320878512488641033584061027063035e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.20775109959302182467696345673111724657e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.67505804311611026128557007926613964162e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.77854913919309273628222660024596583623e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.91661599559554233157812211199256222756e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.83924945472605861063053622956144354568e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.84286353909650034923710426843028632590e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57737060659799463556626420070111210218e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76047305116625604109657617040360402976e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.86975509621224474718728318687795215895e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71646204381423826495116781730719271111e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30359141441663007574346497273327240071e-9), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + RealType s = exp(2 * x * x * x / 27) * sqrt(-x); + + if (x >= -4) { + RealType t = -x - 2; + + // Rational Approximation + // Maximum Relative Error: 5.4547e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74308494787955998605105974174143750745e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.56767876568276519015214709629156760546e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23402577465454790961498400214198520261e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09577559351834952074671208183548972395e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.76209118910349927892265971592071407626e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.09368637728788364895148841703533651597e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09003822946777310058789032386408519829e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.02362804210869367995322279203786166303e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.67210045349462046966360849113168808620e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17170437120510484976042000272825166724e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62068279517157268391045945672600042900e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.72238125522303876741011786930129571553e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33906175951716762094473406744654874848e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.88118741063309731598638313174835288433e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78908322579081615215057968216358892954e-9), + }; + BOOST_MATH_STATIC const RealType Q[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.15777668058369565739250784347385217839e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.58275582332060589146223977924181161908e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.08890987062755381429904193744273374370e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.53062680969750921573862970262146744660e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15983695707064161504470525373678920004e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.09120624447001177857109399158887656977e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13566107440776375294261717406754395407e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.50716565210262652091950832287627406780e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.40417354541359829249609883808591989082e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.09285589734746898623782466689035549135e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.47580156629757526370271002425784456931e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.03479533688660179064728081632921439825e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.58728676819719406366664644282113323077e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.72685000369623096389026353785111272994e-9), + }; + // LCOV_EXCL_STOP + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -8) { + RealType t = -x - 4; + + // Rational Approximation + // Maximum Relative Error: 1.8813e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.67391547707456587286086623414017962238e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.69944730920904699720804320295067934914e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.80384452804523880914883464295008532437e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.74832028145199140240423863864148009059e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71728522451977382202061046054643165624e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.91023495084678296967637417245526177858e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57730498044529764612538979048001166775e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31940820074475947691746555183209863058e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.54175805821840981842505041345112198286e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.31350452337838677820161124238784043790e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.52175993144502511705213771924810467309e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.85684239411667243910736588216628677445e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.27124210379062272403030391492854565008e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17645475312219452046348851569796494059e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06306499345515479193219487228315566344e-11), + }; + BOOST_MATH_STATIC const RealType Q[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13521398369589479131299586715604029947e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.17680254721938920978999949995837883884e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40693619288419980101309080614788657638e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.44930162913500531579305526795523256972e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22044272115074113804712893993125987243e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.92745159832354238503828226333417152767e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24766164774700476810039401793119553409e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.08325637569571782180723187639357833929e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74954547353553788519997212700557196088e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.82800744682204649977844278025855329390e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.20210992299988298543034791887173754015e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22996819257926038785424888617824130286e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.42340212199922656577943251139931264313e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.75700556749505163188370496864513941614e-11), + }; + // LCOV_EXCL_STOP + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -16) { + RealType t = -x - 8; + + // Rational Approximation + // Maximum Relative Error: 3.7501e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.66153901932100301337118653561328446399e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.52542079386371212946566450189144670788e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.17560936304516198261138159102435548430e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.26792904240601626330507068992045446428e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.15418212265160879313643948347460896640e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.05247220687656529725024232836519908641e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.64228534097787946289779529645800775231e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.85634097697132464418223150629017524118e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.49585420710073223183176764488210189671e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.48871040740917898530270248991342594461e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.42577266655992039477272273926475476183e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19214263302271253341410568192952269518e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36635313919771528255819112450043338510e-12), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32484755553196872705775494679365596205e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.17714315014480774542066462899317631393e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.10789882607024692577764888497624620277e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.09821963157449764169644456445120769215e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52354198870000121894280965999352991441e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.12133327236256081067100384182795121111e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.20187894923874357333806454001674518211e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.69039238999927049119096278882765161803e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.35737444680219098802811205475695127060e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.54403624143647064402264521374546365073e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.24233005893817070145949404296998119469e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.89735152971223120087721392400123727326e-12), + }; + // LCOV_EXCL_STOP + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -32) { + RealType t = -x - 16; + + // Rational Approximation + // Maximum Relative Error: 9.2696e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65985830928929730672052407058361701971e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.80409998734303497641108024806388734755e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.49286120625421787109350223436127409819e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.89160491404149422833016337592047445082e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16725811789351893632348469796802834008e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.43438517439595919021069131504449842238e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.29058184637190638359623120253986595623e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.03288592271246432030980385908922413497e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.12286831076824535034975676306286388291e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.64563161552001551475186730009447111173e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.13183856815615371136129883169639301710e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.02405342795439598418033139109649640085e-35), + }; + BOOST_MATH_STATIC const RealType Q[11] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.78286317363568496229516074305435186276e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06519013547074134846431611115576250187e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.71907733798006110542919988654989891098e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.38874744033460851257697736304200953873e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.54724289412996188575775800547576856966e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.98922099980447626797646560786207812928e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.64352676367403443733555974471752023206e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92616898324742524009679754162620171773e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.87471345773127482399498877510153906820e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92954174836731254818376396170511443820e-12), + }; + // LCOV_EXCL_STOP + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -64) { + RealType t = -x - 32; + + // Rational Approximation + // Maximum Relative Error: 2.3524e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[10] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65964563346442080104568381680822923977e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.77958685324702990033291591478515962894e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.56419338083136866686699803771820491401e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.82465178504003399087279098324316458608e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92402911374159755476910533154145918079e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.91224450962405933321548581824712789516e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.84063939469145970625490205194192347630e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.15300528698702940691774461674788639801e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.85553643603397817535280932672322232325e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.46207029637607033398822620480584537642e-38), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.54906717312241693103173902792310528801e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.84408124581401290943345932332007045483e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.81403744024723164669745491417804917709e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.23423244618880845765135047598258754409e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.84697524433421334697753031272973192290e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.94803525968789587050040294764458613062e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.68948879514200831687856703804327184420e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07366525547027105672618224029122809899e-12), + }; + // LCOV_EXCL_STOP + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + result = 0; + } + } + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_pdf_imp_prec(const RealType& x, const boost::math::integral_constant &tag) { + if (x >= 0) { + return mapairy_pdf_plus_imp_prec(x, tag); + } + else if (x <= 0) { + return mapairy_pdf_minus_imp_prec(x, tag); + } + else { + return boost::math::numeric_limits::quiet_NaN(); + } +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_pdf_imp_prec(const RealType& x, const boost::math::integral_constant& tag) { + if (x >= 0) { + return mapairy_pdf_plus_imp_prec(x, tag); + } + else if (x <= 0) { + return mapairy_pdf_minus_imp_prec(x, tag); + } + else { + return boost::math::numeric_limits::quiet_NaN(); + } +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_pdf_imp(const mapairy_distribution& dist, const RealType& x) { + // + // This calculates the pdf of the Map-Airy distribution and/or its complement. + // + + BOOST_MATH_STD_USING // for ADL of std functions + constexpr auto function = "boost::math::pdf(mapairy<%1%>&, %1%)"; + RealType result = 0; + RealType location = dist.location(); + RealType scale = dist.scale(); + + if (false == detail::check_location(function, location, &result, Policy())) + { + return result; + } + if (false == detail::check_scale(function, scale, &result, Policy())) + { + return result; + } + if (false == detail::check_x(function, x, &result, Policy())) + { + return result; + } + + typedef typename tools::promote_args::type result_type; + typedef typename policies::precision::type precision_type; + typedef boost::math::integral_constant tag_type; + + static_assert(tag_type::value, "The Map-Airy distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats"); + + RealType u = (x - location) / scale; + + result = mapairy_pdf_imp_prec(u, tag_type()) / scale; + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_cdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x < 1) { + // Rational Approximation + // Maximum Relative Error: 2.9194e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(3.33333333333333333333e-1), + static_cast(7.49532137610545010591e-2), + static_cast(9.25326921848155048716e-3), + static_cast(6.59133092365796208900e-3), + static_cast(-5.21942678326323374113e-4), + static_cast(8.22766804917461941348e-5), + static_cast(-3.97941251650023182117e-6), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(8.17408156824742736411e-1), + static_cast(3.57041011418415988268e-1), + static_cast(1.04580353775369716002e-1), + static_cast(1.87521616934129432292e-2), + static_cast(2.33232161135637085535e-3), + static_cast(7.31285352607895467310e-5), + }; + + result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x); + } + else if (x < 2) { + RealType t = x - 1; + + // Rational Approximation + // Maximum Relative Error: 3.1531e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(1.84196970581015939888e-1), + static_cast(-1.19398028299089933853e-3), + static_cast(1.21954054797949597854e-2), + static_cast(-9.37912675685073154845e-4), + static_cast(1.66651954077980453212e-4), + static_cast(-1.33271812303025233648e-5), + static_cast(5.35982226125013888796e-7), + }; + BOOST_MATH_STATIC const RealType Q[6] = { + static_cast(1.), + static_cast(5.70352826101668448273e-1), + static_cast(1.98852010141232271304e-1), + static_cast(3.64864882318453496161e-2), + static_cast(4.22173125405065522298e-3), + static_cast(1.20079284386796600356e-4), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 4) { + RealType t = x - 2; + + // Rational Approximation + // Maximum Relative Error: 1.8348e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(1.07409273397524124098e-1), + static_cast(3.83900318969331880402e-2), + static_cast(1.17926652359826576790e-2), + static_cast(1.52181625871479030046e-3), + static_cast(1.50703424417132565662e-4), + static_cast(2.10117959279448106308e-6), + static_cast(1.97360985832285866640e-8), + static_cast(-1.06076300080048408251e-9), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(8.54435380513870673497e-1), + static_cast(3.66021233157880878411e-1), + static_cast(9.42985570806905160687e-2), + static_cast(1.54122343653998564507e-2), + static_cast(1.49849056258932455548e-3), + static_cast(6.94290406268856211707e-5), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 8) { + RealType t = x - 4; + + // Rational Approximation + // Maximum Relative Error: 2.6624e-18 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(4.70720199535228802538e-2), + static_cast(2.67200763833749070079e-2), + static_cast(7.37400551855064729769e-3), + static_cast(1.10592441765001623699e-3), + static_cast(9.15846028547400212588e-5), + static_cast(3.17801522553862136789e-6), + static_cast(2.03102753319827713542e-8), + static_cast(-5.16172854149066643529e-11), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1.), + static_cast(9.05317644829451086870e-1), + static_cast(3.73713496637025562492e-1), + static_cast(8.94434672792094976627e-2), + static_cast(1.31846542255347106087e-2), + static_cast(1.16680596342421447100e-3), + static_cast(5.44719256441278863300e-5), + static_cast(8.73131209154185067287e-7), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 16) { + RealType t = x - 8; + + // Rational Approximation + // Maximum Relative Error: 2.6243e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(1.74847564444513000450e-2), + static_cast(6.00209162595027323742e-3), + static_cast(7.86550260761375576075e-4), + static_cast(4.46682547335758521734e-5), + static_cast(9.51329761417139273391e-7), + static_cast(4.10313065114362712333e-9), + static_cast(-9.81286503831545640189e-12), + static_cast(2.98763969872672156104e-14), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(5.27732094554221674504e-1), + static_cast(1.14330643482604301178e-1), + static_cast(1.27722341942374066265e-2), + static_cast(7.54563340152441778517e-4), + static_cast(2.13377039814057925832e-5), + static_cast(2.09670987094350618690e-7), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 32) { + RealType t = x - 16; + + // Rational Approximation + // Maximum Relative Error: 5.4684e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(6.22684103170563193015e-3), + static_cast(1.34714356588780958096e-3), + static_cast(9.51289465377874891896e-5), + static_cast(2.64918464474843134081e-6), + static_cast(2.66703857491046795285e-8), + static_cast(5.42037888457985833156e-11), + static_cast(-6.18017115447736427379e-14), + static_cast(9.11626234402148561268e-17), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(3.09895694991285975774e-1), + static_cast(3.69874670435930773471e-2), + static_cast(2.15708854325146400153e-3), + static_cast(6.35345408451056881884e-5), + static_cast(8.65722805575670770555e-7), + static_cast(4.03153189557220023202e-9), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 64) { + RealType t = x - 32; + + // Rational Approximation + // Maximum Relative Error: 6.5947e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(2.20357145727036120652e-3), + static_cast(1.45412555771401325111e-4), + static_cast(3.27819006009093198652e-6), + static_cast(2.96786786716623870006e-8), + static_cast(9.54192199129339742308e-11), + static_cast(5.71421706870777687254e-14), + static_cast(-1.48321866072033823195e-17), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(1.12851983233980279746e-1), + static_cast(4.94650928817638043712e-3), + static_cast(1.05447405092956497114e-4), + static_cast(1.11578464291338271178e-6), + static_cast(5.27522295397347842625e-9), + static_cast(7.95786524903707645399e-12), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + RealType x_cube = x * x * x; + RealType t = static_cast((boost::math::isnormal)(x_cube) ? 1 / sqrt(x_cube) : 1 / pow(sqrt(x), 3)); + + // Rational Approximation + // Maximum Relative Error: 6.2709e-17 + BOOST_MATH_STATIC const RealType P[4] = { + static_cast(3.98942280401432677940e-1), + static_cast(2.89752186412133782995e-2), + static_cast(4.67360459917040710474e0), + static_cast(-1.26770824563800250704e-1), + }; + BOOST_MATH_STATIC const RealType Q[3] = { + static_cast(1.), + static_cast(7.26301023103568827709e-2), + static_cast(1.60899894281099149848e1), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t; + } + + return result; +} + + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_cdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x < 1) { + // Rational Approximation + // Maximum Relative Error: 4.7720e-37 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.33333333333333333333333333333333333333e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.38519736580901276671338330967060054188e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.07012342772403725079487012557507575976e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70163612228825567572185033570526547856e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.16393313438726572630782132625753922397e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.92141312947853945617138019222992750592e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16513062047959961711747864068554379374e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08850391017085844154857927364247623649e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07060872491334153829857156707699441084e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.56961733740920438026573722084839596926e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.93626747947476815631021107726714283086e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32967164823609209711923411113824666288e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.23420723211833268177898025846064230665e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13807083548358335699029971528179486964e-13), + }; + BOOST_MATH_STATIC const RealType Q[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00810772528427939684296334977783425582e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.24383652800043768524894854013745098654e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.64696616559657052516796844068580626381e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.62288747679271039067363492752820355369e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19311779292286492714550084942827207241e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.48436879303839576521077892946281025894e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.28665316157256311138787387605249076674e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.36350302380845433472593647100484547496e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.05835458213330488018147374864403662878e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.13919959493955187399856105325181806876e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30960533107704070411766556906543316310e-8), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x); + } + else if (x < 2) { + RealType t = x - 1; + + // Rational Approximation + // Maximum Relative Error: 1.6297e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.84196970581015939887507434989936103587e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23864910443500344832158256856064580005e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.72066675347648126090497588433854314742e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.81712740200456564860442639192891089515e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.39091197181834765859741334477680768031e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.03759464781707198959689175957603165395e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15298069568149410830642785868857309358e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18910514301176322829267019223946392192e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.16851691488007921400221017970691227149e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.82031940093536875619655849638573432722e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.30042143299959913519747484877532997335e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.19848671456872291336347012756651759817e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.00479393063394570750334218362674723065e-13), + }; + BOOST_MATH_STATIC const RealType Q[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24929390929112144560152115661603117364e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.34853762543033883106055186520573363290e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.73783624941936412984356492130276742707e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23224734370942016023173307854505597524e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.11116448823067697039703254343621931158e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.12490054037308798338231679733816982120e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.38701607014856856812627276285445001885e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10075199231657382435402462616587005087e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.43662615015322880941108094510531477066e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.37981396630189761210639158952200945512e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.55820444854396304928946970937054949160e-8), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 4) { + RealType t = x - 2; + + // Rational Approximation + // Maximum Relative Error: 2.8103e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07409273397524124098315500450332255837e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.98373054365213259465522536994638631699e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.30851284606709136235419547406278197945e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92686617543233900289721448026065555990e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.18056394312484073294780140350522772329e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07058343449035366484618967963264380933e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71636108080692802684712497501670425230e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13155853034615230731719317488499751231e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.00070273388376168880473457782396672044e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35528857373910910704625837069445190727e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.99897218751541535347315078577172104436e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35092090729912631973050415647154137571e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.72220647682193638971237255396233171508e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.45008884108655511268690849420714428764e-15), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.42652074703683973183213296310906006173e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.03479786698331153607905223548719296572e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.95556520914240562719970700900964416000e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.73127917601685318803655745157828471269e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.63007065833918179119250623000791647836e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.70652732923091039268400927316918354628e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60880782675229297981880241245777122866e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.09979261868403910549978204036056659380e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.12085610111710889118562321318284539217e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59811533082647193392924345081953134304e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.37211668706684650035086116219257276925e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.62479830409039340826066305367893543134e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.22039803134898937546371285610102850458e-11), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 8) { + RealType t = x - 4; + + // Rational Approximation + // Maximum Relative Error: 7.5930e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.70720199535228802537946089633331273434e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.85220706158476482443562698303252970927e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.55090221054465759649629178911450010833e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.70398047783095186291450019612979853708e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11846661331973171721224034349719801691e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.83195024406409870789088752469490824640e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.23908312140480103249294791529383548724e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.40765128885655152415228193255890859830e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.14294523267278070539100529759317560119e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.26815059429007745850376987481747435820e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.28142945635159623618312928455133399240e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.77079683180868753715374495747422819326e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.73710011278079325323578951018770847628e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.70140037580287364298206334732060874507e-16), + }; + BOOST_MATH_STATIC const RealType Q[16] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36848014038411798213992770858203510748e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.15373052017549822413011375404872359177e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.92705544967513282963463451395766172671e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19899290805598090502434290420047460406e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74002906913724742582773116667380578990e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.80632456977494447641985312297971970632e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.53381530665983535467406445749348183915e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.86606180756179817016240556949228031340e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.49594666942152749850479792402078560469e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.25231012522695972983928740617341887334e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34987086926725472733984045599487947378e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.58286136970918021841189712851698747417e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.12238357666199366902936267515573231037e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.82464168044335183356132979380360583444e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.40073718480172265670072434562833527076e-17), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 16) { + RealType t = x - 8; + + // Rational Approximation + // Maximum Relative Error: 7.3609e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[16] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74847564444513000450056174922427854591e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.56842503159303803254436983444304764079e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.48504629497687889354406208309334148575e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62327083507366120871877936416427790391e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72062210557023828776202679230979309963e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.19153025667221102770398900522196418041e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.66248482185063262034022017727375829162e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57390218395059632327421809878050974588e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.45520328522839835737631604118833792570e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76327978880339919462910339138428389322e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.99700625463451418394515481232159889297e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.82943476668680389338853032002472541164e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19415284760817575957617090798914089413e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17080879333540200065368097274334363537e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.09912208206107606750610288716869139753e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.98451733054622166748935243139556132704e-26), + }; + BOOST_MATH_STATIC const RealType Q[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08148065380582488495702136465010348576e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.42385352331252779422725444021027377277e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66510412535270623169792008730183916611e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.47952712144801508762945315513819636452e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.20703092334999244212988997416711617790e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.71658889250345012472529115544710926154e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.63905601023452497974798277091285373919e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.76730409484335386334980429532443217982e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19139408077753398896224794522985050607e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.58025872548387600940275201648443410419e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.11369336267349152895272975096509109414e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.56182954522937999103610817174373785571e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.35452907177197742692545044913125982311e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23587924912460218189929226092439805175e-17), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 32) { + RealType t = x - 16; + + // Rational Approximation + // Maximum Relative Error: 9.7192e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.22684103170563193014558918295924551173e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.55222688816852408105912768186300290291e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.60747505331765587662432023547517953629e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.80463770266821887100086895337451846880e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.19190824169154471000496746227725070963e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40646301571395681364881852739555404287e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.15408836734496798091749932018121879724e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.13676779930022341958128426888835497781e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02435098103190516418351075792372986932e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.82018920071479061978244972592746216377e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.26435061215428679536159320644587957335e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.05298407883178633891153989998582851270e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.61156860101928352010449210760843428372e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.02156808288545876198121127510075217184e-27), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65549196385656698597261688277898043367e-30), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.03426141030409708635168766288764563749e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13808987755928828118915442251025992769e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52253239792170999949444502938290297674e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33720936468171204432499390745432338841e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.08713980159883984886576124842631646880e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.43652846144339754840998823540656399165e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02849693617024492825330133490278326951e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.14110017452008167954262319462808192536e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.01462578814695350559338360744897649915e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.73495817568046489613308117490508832084e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47445372925844096612021093857581987132e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.08200002287534174751275097848899176785e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.15305756373406702253187385797525419287e-21), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 64) { + RealType t = x - 32; + + // Rational Approximation + // Maximum Relative Error: 9.7799e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.20357145727036120652264700679701054983e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.95712324967981162396595365933255312698e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.08619492652809635942960438372427086939e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37140224583881547818087260161723208444e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.83073777522092069988595553041062506001e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.00473542739040742110568810201412321512e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.47447289601822506789553624164171452120e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.70913574957198131397471307249294758738e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.36538119628489354953085829178695645929e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00763343664814257170332492241110173166e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62297585950798764290583627210836077239e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15780217054514513493147192853488153246e-24), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.31961589164397397724611386366339562789e-28), + }; + BOOST_MATH_STATIC const RealType Q[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.26440207646105117747875545474828367516e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.27872879091838733280518786463281413334e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34256572873114675776148923422025029494e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.13595637397535037957995766856628205747e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33745879863685053883024090247009549434e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41792226523670940279016788831933559977e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.03966147662273388060545199475024100492e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.62177951640260313354050335795080248910e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50650165210517365082118441264513277196e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.48413283257020741389298806290302772976e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16439276222123152748426700489921412654e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24969602890963356175782126478237865639e-22), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.08681155203261739689727004641345513984e-26), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.28282024196484688479115133027874255367e-30), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + RealType x_cube = x * x * x; + RealType t = (boost::math::isnormal)(x_cube) ? 1 / sqrt(x_cube) : 1 / pow(sqrt(x), 3); + + // Rational Approximation + // Maximum Relative Error: 3.5865e-37 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[8] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.98942280401432677939946059934381868476e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.12426566605292130233061857505057433291e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.91574528280329492283287073127040983832e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.69914217884224943794012165979483573091e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30178902028403564086640591437738216288e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96515490341559353794378324810127583810e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.44343825578434751356083230369361399507e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.07224810408790092272497403739984510394e2), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.32474438135610721926278423612948794250e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27594461167587027771303292526448542806e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.49207539478843628626934249487055017677e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.75094412095634602055738687636893575929e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.51642534474780515366628648516673270623e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.05977615003758056284424652420774587813e4), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t; + } + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_cdf_minus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x >= -1) { + RealType t = x + 1; + + // Rational Approximation + // Maximum Relative Error: 1.6964e-18 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(4.23238998449671083670e-1), + static_cast(4.95353582976475183891e-1), + static_cast(2.45823281826037784270e-1), + static_cast(7.29726507468813920788e-2), + static_cast(1.63332856186819713346e-2), + static_cast(2.82514634871307516142e-3), + static_cast(2.66220579589280704089e-4), + static_cast(3.09442180091323751049e-6), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(5.16241922223786900600e-1), + static_cast(2.75690727171711638879e-1), + static_cast(7.18707184893542884080e-2), + static_cast(1.87136800286819336797e-2), + static_cast(2.38383441176345054929e-3), + static_cast(3.23509126477812051983e-4), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -2) { + RealType t = x + 2; + + // Rational Approximation + // Maximum Relative Error: 5.8303e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(1.62598955251978523175e-1), + static_cast(2.30154661502402196205e-1), + static_cast(1.29233975368291684522e-1), + static_cast(3.80919553916980965587e-2), + static_cast(8.17724414618808505948e-3), + static_cast(1.95816800210481122544e-3), + static_cast(3.35259917978421935141e-4), + static_cast(1.22071311320012805777e-5), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(9.63771793313770952352e-2), + static_cast(2.23602260938227310054e-1), + static_cast(9.21944797677283179038e-3), + static_cast(1.82181136341939651516e-2), + static_cast(1.11216849284965970458e-4), + static_cast(5.57446347676836375810e-4), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + RealType s = exp(2 * x * x * x / 27) / sqrt(-x * x * x); + + if (x >= -4) { + RealType t = -x - 2; + + // Rational Approximation + // Maximum Relative Error: 3.6017e-18 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(8.31806744221966404520e-1), + static_cast(1.34481067378012055850e0), + static_cast(9.12139427469494995264e-1), + static_cast(3.59706159222491124928e-1), + static_cast(9.48836332725688279299e-2), + static_cast(1.68259594978853951234e-2), + static_cast(1.89700733471520162946e-3), + static_cast(1.13854052826846329787e-4), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1.), + static_cast(1.29694286517571741097e0), + static_cast(7.99686735441213882518e-1), + static_cast(3.08198207583883597188e-1), + static_cast(7.97230139795658588972e-2), + static_cast(1.40742142048849462162e-2), + static_cast(1.58411440546277691506e-3), + static_cast(9.51560785730564046338e-5), + }; + + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -8) { + RealType t = -x - 4; + + // Rational Approximation + // Maximum Relative Error: 1.3504e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(1.10294551528734705946e0), + static_cast(1.26696377028973554615e0), + static_cast(6.63115985833429688941e-1), + static_cast(2.06289793717379095832e-1), + static_cast(4.11977615717846276227e-2), + static_cast(5.28620928618550859827e-3), + static_cast(4.04328442334023561279e-4), + static_cast(1.42364413902075896503e-5), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1.), + static_cast(1.09709853682665798542e0), + static_cast(5.63687797989627787500e-1), + static_cast(1.73604358560002859604e-1), + static_cast(3.44985744385890794044e-2), + static_cast(4.41683993064797272821e-3), + static_cast(3.37834206192286709492e-4), + static_cast(1.18951465786445720729e-5), + }; + + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -16) { + RealType t = -x - 8; + + // Rational Approximation + // Maximum Relative Error: 8.8272e-18 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(1.18246847255744057280e0), + static_cast(8.41320657699741240497e-1), + static_cast(2.55093097377551881478e-1), + static_cast(4.21261576802732715976e-2), + static_cast(3.98805044659990523312e-3), + static_cast(2.04688276265993954527e-4), + static_cast(4.43354791268634655473e-6), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(7.07103973315808077783e-1), + static_cast(2.13664682181055450396e-1), + static_cast(3.52218225168465984709e-2), + static_cast(3.33218664347896435919e-3), + static_cast(1.71025807471868853268e-4), + static_cast(3.70441884597642042665e-6), + }; + + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -32) { + RealType t = -x - 16; + + // Rational Approximation + // Maximum Relative Error: 2.6236e-18 + BOOST_MATH_STATIC const RealType P[6] = { + static_cast(1.19497306481411168356e0), + static_cast(3.90497195765498241356e-1), + static_cast(5.13120330037626853257e-2), + static_cast(3.38574023921119491471e-3), + static_cast(1.12075935888344736993e-4), + static_cast(1.48743616420183584738e-6), + }; + BOOST_MATH_STATIC const RealType Q[6] = { + static_cast(1.), + static_cast(3.26493785348088598123e-1), + static_cast(4.28813205161574223713e-2), + static_cast(2.82893073845390254969e-3), + static_cast(9.36442365966638579335e-5), + static_cast(1.24281651532469125315e-6), + }; + + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + result = 0; + } + } + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_cdf_minus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x >= -1) { + RealType t = x + 1; + + // Rational Approximation + // Maximum Relative Error: 1.0688e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.23238998449671083670041452413316011920e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.14900991369455846775267187236501987891e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.19132787054572299485638029221977944555e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.87295743700300806662745209398368996653e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.41994520703802035725356673887766112213e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78782099629586443747968633412271291734e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.05200546520666366552864974572901349343e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.51453477916196630939702866688348310208e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15461354910584918402088506199099270742e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43371674256124419899137414410592359185e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35849788347057186916350200082990102088e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.50359296597872967493549820191745700442e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.21838020977580479741299141050400953125e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.46723648594704078875476888175530463986e-12), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.98700317671474659677458220091101276158e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.00405631175818416028878082789095587658e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.04189939150805562128632256692765842568e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.03621065280443734565418469521814125946e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85722257874304617269018116436650330070e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.24191409213079401989695901900760076094e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.64269032641964601932953114106294883156e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19289631274036494326058240677240511431e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41389309719775603006897751176159931569e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.42000309062533491486426399210996541477e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.02436961569668743353755318268149636644e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.50130875023154569442119099173406269991e-9), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -2) { + RealType t = x + 2; + + // Rational Approximation + // Maximum Relative Error: 5.1815e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62598955251978523174755901843430986522e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.08127698872954954678270473317137288772e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.70144997468767751317246482211703706086e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.49486603823046766249106014234315835102e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.07186495389828596786579668258622667573e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.98334953533562948674335281457057445421e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.44119017374895211020429143034854620303e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27080759819117162456137826659721634882e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.53892796920597912362370019918933112349e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.30530442651657077016130554430933607143e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.04837779538527662990102489150650534390e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.94354615171320374997141684442120888127e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.30746545799073289786965697800049892311e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.41870129065056783732691371215602982173e-9), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.75919235734607601884356783586727272494e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.57656678936617227532275100649989944452e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.72617552401870454676736869003112018648e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.59238104942208254162102314312757621047e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06040513359343987972917295603514840777e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.26922840063034349024167652148593396307e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.25628506630180107357627955876231943531e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.81600387497542714853225329159728694926e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.08210973846891324886779444820838563800e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.68632477858150229792523037059221563861e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.43542789104866782087701759971538600076e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.70594730517167328271953424328890849790e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.30162314557860623869079601905904538470e-9), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + RealType s = exp(2 * x * x * x / 27) / sqrt(-x * x * x); + + if (x >= -4) { + RealType t = -x - 2; + + // Rational Approximation + // Maximum Relative Error: 6.4678e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[16] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.31806744221966404520449104514474066823e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.50292887071777664663197915067642779665e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.45140067157601150721516139901304901854e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.93227973605511286112712730820664209900e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74259108933048973391560053531348126900e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.77677252890665602191818487592154553094e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.71843197238558832510595724454548089268e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.62811778285151415483649897138119310816e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74127763877120261698596916683136227034e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.24832552591462216226478550702845438540e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.93381036027487259940171548523889481080e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.02261328789519398745019578211081412570e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.75409238451885381267277435341417474231e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.09526311389365895099871581844304449319e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96371756262605118060185816854433322493e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.88472819535099746216179119978362211227e-10), + }; + BOOST_MATH_STATIC const RealType Q[16] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.68923525157720774962908922391133419863e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.40714902096062779527207435671907059131e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.73120596883364361220343183559076165363e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.56331267512666685349409906638266569733e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.80956276267438042306216894159447642323e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.34213468750936211385520570062547991332e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.50081600968590616549654807511166706919e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47297208240850928379158677132220746750e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.73392976579560287571141938466202325901e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.13858821123741335782695407397784840241e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.04103497389656828224053882850778186433e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.81040189127998139689091455192659191796e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.42176283104790992634826374270801565123e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.64077137545614380065714904794220228239e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.08139724991616322332901357866680220241e-10), + }; + // LCOV_EXCL_STOP + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -8) { + RealType t = -x - 4; + + // Rational Approximation + // Maximum Relative Error: 3.5975e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[16] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10294551528734705945662709421382590676e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.26135857114883288617323671166863478751e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.23504465936865651893193560109437792738e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41598983788870071301270649341678962009e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.43871304031224174103636568402522086316e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22745720458050596514499383658714367529e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.05976431838299244997805790000128175545e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32087500190238014890030606301748111874e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.32754445514451500968404092049839985196e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31866016448921762610690552586049011375e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.80197257671079297305525087998125408939e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.44212088947602969374978384512149432847e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.38857170416924025226203571589937286465e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20239999218390467567339789443070294182e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.93965060142992479149039624149602039394e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36407983918582149239548869529460234702e-12), + }; + BOOST_MATH_STATIC const RealType Q[16] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99867960957804580209868321228347067213e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.94236623527818880544030470097296139679e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21644866845440678050425616384656052588e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.48653919287388803523090727546630060490e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88696531788490258477870877792341909659e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.11157356307921406032115084386689196255e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11071149696069503480091810333521267753e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.95274844731679437274609760294652465905e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.77971280158253322431071249000491659536e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.18092258028773913076132483326275839950e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.87773088535057996947643657676843842076e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99609277781492599950063871899582711550e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00465660598924300723542908245498229301e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.29174652982710100418474261697035968379e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.31746082236506935340972706820707017875e-12), + }; + // LCOV_EXCL_STOP + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -16) { + RealType t = -x - 8; + + // Rational Approximation + // Maximum Relative Error: 2.6792e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18246847255744057280356900905660312795e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.77955886026107125189834586992142580148e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24948425302263641813107623611637262126e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.42593662659560333324287312162818766556e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62714138002904073145045478360748042164e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.56008984285541289474850396553042124777e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.84858048549330525583286373950733005244e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.30578460156038467943968005946143934751e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.93974868941529258700281962314167648967e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.95086664204515648622431580749060079100e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.57811968176644056830002158465591081929e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.27814751838906948007289825582251221538e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06762893426725920159998333647896590440e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.15388861641344998301210173677051088515e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.83956842740198388242245209024484381888e-29), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50056124032615852703112365430040751173e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05112559537845833793684655693572118348e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.55609497026127521043140534271852131858e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36430652394614121156238070755223942728e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.98167215021940993097697777547641188697e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.89418831310297071347013983522734394061e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.10980717618462843498917526227524790487e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.80119618735773019675212434416594954984e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.13748676086657187580746476165248613583e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.15424395860921826755718081823964568760e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.75228896859720124469916340725146705309e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.72759238269315282789451836388878919387e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.79966603543593799412565926418879689461e-12), + }; + // LCOV_EXCL_STOP + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -32) { + RealType t = -x - 16; + + // Rational Approximation + // Maximum Relative Error: 2.1744e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19497306481411168355692832231058399132e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.15593166681833539521403250736661720488e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.54020260738207743315755235213180652303e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.76467972857585566189917087631621063058e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.97922795572348613358915532172847895070e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.26998967192207380100354278434037095729e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.32827180395699855050424881575240362199e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50587178182571637802022891868380669565e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.78252548290929962236994183546354358888e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01519297007773622283120166415145520855e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29602226691665918537895803270497291716e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.53666531487585211574942518181922132884e-14), + }; + BOOST_MATH_STATIC const RealType Q[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.82230649578130958108098853863277631065e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.12412482738973738235656376802445565005e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.98320116955422615960870363549721494683e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.99756654189000467678223166815845628725e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.40414942475279981724792023159180203408e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78118445466942812088955228016254912391e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.25827002637577602812624580692342616301e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.99604467789028963216078448884632489822e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.48237134334492289420105516726562561260e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08288201960155447241423587030002372229e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.29720612489952110448407063201146274502e-14), + }; + // LCOV_EXCL_STOP + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x >= -64) { + RealType t = -x - 32; + + // Rational Approximation + // Maximum Relative Error: 3.4699e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[10] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19659414007358083585943280640656311534e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.36969140730640253987817932335415532846e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.21946928005759888612066397569236165853e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.08341720579009422518863704766395201498e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44908614491286780138818989614277172709e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.54172482866925057749338312942859761961e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.49281630950104861570255344237175124548e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27586759709416364899010676712546639820e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00054716479138657682306851175059678989e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.48798342894235412426464893852098239746e-14), + }; + BOOST_MATH_STATIC const RealType Q[10] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.81588658109851219975949691772676519853e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.52583331848892383968186924120872369151e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.57644670426430994363913234422346706991e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21080164634428298820141591419770346977e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.79484074949580980980061103238709314326e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.93167250146504946763386377338487557826e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06604193118724797924138056151582242604e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.35999937789324222934257460080153249173e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.91435929481043135336094426837156247599e-14), + }; + // LCOV_EXCL_STOP + result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + result = 0; + } + } + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_cdf_imp_prec(const RealType& x, bool complement, const boost::math::integral_constant& tag) { + if (x >= 0) { + return complement ? mapairy_cdf_plus_imp_prec(x, tag) : 1 - mapairy_cdf_plus_imp_prec(x, tag); + } + else if (x <= 0) { + return complement ? 1 - mapairy_cdf_minus_imp_prec(x, tag) : mapairy_cdf_minus_imp_prec(x, tag); + } + else { + return boost::math::numeric_limits::quiet_NaN(); + } +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_cdf_imp_prec(const RealType& x, bool complement, const boost::math::integral_constant& tag) { + if (x >= 0) { + return complement ? mapairy_cdf_plus_imp_prec(x, tag) : 1 - mapairy_cdf_plus_imp_prec(x, tag); + } + else if (x <= 0) { + return complement ? 1 - mapairy_cdf_minus_imp_prec(x, tag) : mapairy_cdf_minus_imp_prec(x, tag); + } + else { + return boost::math::numeric_limits::quiet_NaN(); + } +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_cdf_imp(const mapairy_distribution& dist, const RealType& x, bool complement) { + // + // This calculates the cdf of the Map-Airy distribution and/or its complement. + // + + BOOST_MATH_STD_USING // for ADL of std functions + constexpr auto function = "boost::math::cdf(mapairy<%1%>&, %1%)"; + RealType result = 0; + RealType location = dist.location(); + RealType scale = dist.scale(); + + if (false == detail::check_location(function, location, &result, Policy())) + { + return result; + } + if (false == detail::check_scale(function, scale, &result, Policy())) + { + return result; + } + if (false == detail::check_x(function, x, &result, Policy())) + { + return result; + } + + typedef typename tools::promote_args::type result_type; + typedef typename policies::precision::type precision_type; + typedef boost::math::integral_constant tag_type; + + static_assert(tag_type::value, "The Map-Airy distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats"); + + RealType u = (x - location) / scale; + + result = mapairy_cdf_imp_prec(u, complement, tag_type()); + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_quantile_lower_imp_prec(const RealType& p, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (p >= 0.375) { + RealType t = p - static_cast (0.375); + + // Rational Approximation + // Maximum Relative Error: 1.5488e-18 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(-1.17326074020471664075e0), + static_cast(1.51461298154568349598e0), + static_cast(1.19979368094343490487e1), + static_cast(-5.94882121521324108164e0), + static_cast(-2.20619749774447254528e1), + static_cast(7.17766543775229176131e0), + static_cast(4.79284243496552841508e0), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(1.76268072706610602584e0), + static_cast(-4.88492535243404839734e0), + static_cast(-5.67524172432687656881e0), + static_cast(6.83327389947131710596e0), + static_cast(2.91338085774159042709e0), + static_cast(-1.41108918944159283950e0), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (p >= 0.25) { + RealType t = p - static_cast (0.25); + + // Rational Approximation + // Maximum Relative Error: 7.5181e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(-1.63281240925531302762e0), + static_cast(-4.92351310795930780147e0), + static_cast(1.43448529253101759409e1), + static_cast(3.33182629948094299473e1), + static_cast(-3.06679026539368582747e1), + static_cast(-2.87298447423841965301e1), + static_cast(1.31575930750093554120e1), + }; + BOOST_MATH_STATIC const RealType Q[6] = { + static_cast(1.), + static_cast(5.38761652244702318296e0), + static_cast(2.40932080746189543284e0), + static_cast(-1.69465870062123632126e1), + static_cast(-6.39998944283654848809e0), + static_cast(1.27168434054332272391e1), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (p >= 0.125) { + RealType t = p - static_cast (0.125); + + // Rational Approximation + // Maximum Relative Error: 2.3028e-18 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(-2.18765177572396469657e0), + static_cast(-3.65752788934974426531e1), + static_cast(-1.81144810822028903904e2), + static_cast(-1.22434531262312950288e2), + static_cast(8.99451018491165823831e2), + static_cast(9.11333307522308410858e2), + static_cast(-8.76285742384616909177e2), + static_cast(-2.33786726970025938837e2), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1.), + static_cast(1.91797638291395345792e1), + static_cast(1.24293724082506952768e2), + static_cast(2.82393116012902543276e2), + static_cast(-1.80472369158936285558e1), + static_cast(-5.31764390192922827093e2), + static_cast(-5.60586018315854885788e1), + static_cast(1.21284324755968033098e2), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -4) { + RealType t = -log2(ldexp(p, 3)); + + // Rational Approximation + // Maximum Relative Error: 6.1147e-18 + BOOST_MATH_STATIC const RealType P[6] = { + static_cast(-2.18765177572396470773e0), + static_cast(-2.19887766409334094428e0), + static_cast(-7.77080107207360785208e-1), + static_cast(-1.15551765136654549650e-1), + static_cast(-6.64711321022529990367e-3), + static_cast(-9.74212491048543799073e-5), + }; + BOOST_MATH_STATIC const RealType Q[6] = { + static_cast(1.), + static_cast(7.91919722132624625590e-1), + static_cast(2.17415447268626558639e-1), + static_cast(2.41474762519410575392e-2), + static_cast(9.41084107182696904714e-4), + static_cast(6.65754108797614202364e-6), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -8) { + RealType t = -log2(ldexp(p, 4)); + + // Rational Approximation + // Maximum Relative Error: 2.0508e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(-2.59822399410385085335e0), + static_cast(-2.24306757759003016244e0), + static_cast(-7.36208578161752060979e-1), + static_cast(-1.15130762650287391576e-1), + static_cast(-8.77652386123688618995e-3), + static_cast(-2.96358888256575251437e-4), + static_cast(-3.33661282483762192446e-6), + static_cast(-4.19292241201527861927e-9), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(7.23065798041556418844e-1), + static_cast(1.96731305131315877264e-1), + static_cast(2.49952034298034383781e-2), + static_cast(1.49149568322111062242e-3), + static_cast(3.66010398525593921460e-5), + static_cast(2.46857713549279930857e-7), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -16) { + RealType t = -log2(ldexp(p, 8)); + + // Rational Approximation + // Maximum Relative Error: 2.1997e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(-3.67354365380697580447e0), + static_cast(-1.52181685844845957618e0), + static_cast(-2.40883948836320845233e-1), + static_cast(-1.82424079258401987512e-2), + static_cast(-6.75844978572417703979e-4), + static_cast(-1.11273358356809152121e-5), + static_cast(-6.12797605223700996671e-8), + static_cast(-3.78061321691170114390e-11), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(3.57770840766081587688e-1), + static_cast(4.81290550545412209056e-2), + static_cast(3.02079969075162071807e-3), + static_cast(8.89589626547135423615e-5), + static_cast(1.07618717290978464257e-6), + static_cast(3.57383804712249921193e-9), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -32) { + RealType t = -log2(ldexp(p, 16)); + + // Rational Approximation + // Maximum Relative Error: 2.4331e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(-4.92187819510636697128e0), + static_cast(-9.94924018698264727979e-1), + static_cast(-7.69914962772717316098e-2), + static_cast(-2.85558010159310978248e-3), + static_cast(-5.19022578720207406789e-5), + static_cast(-4.19975546950263453259e-7), + static_cast(-1.13886013623971006760e-9), + static_cast(-3.46758191090170732580e-13), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(1.77270673840643360017e-1), + static_cast(1.18099604045834575786e-2), + static_cast(3.66889581757166584963e-4), + static_cast(5.34484782554469770841e-6), + static_cast(3.19694601727035291809e-8), + static_cast(5.24649233511937214948e-11), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -64) { + RealType t = -log2(ldexp(p, 32)); + + // Rational Approximation + // Maximum Relative Error: 2.7742e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(-6.41443550638291133784e0), + static_cast(-6.38369359780748328332e-1), + static_cast(-2.43420704406734621618e-2), + static_cast(-4.45274771094277987075e-4), + static_cast(-3.99529078051262843241e-6), + static_cast(-1.59758677464731620413e-8), + static_cast(-2.14338367751477432622e-11), + static_cast(-3.23343844538964435927e-15), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(8.79845511272943785289e-2), + static_cast(2.90839059356197474893e-3), + static_cast(4.48172838083912540123e-5), + static_cast(3.23770691025690100895e-7), + static_cast(9.60156044379859908674e-10), + static_cast(7.81134095049301988435e-13), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -128) { + RealType t = -log2(ldexp(p, 64)); + + // Rational Approximation + // Maximum Relative Error: 3.2451e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(-8.23500806363233610938e0), + static_cast(-4.05652655284908839003e-1), + static_cast(-7.65978833819859622912e-3), + static_cast(-6.94194676058731901672e-5), + static_cast(-3.08771646223818451436e-7), + static_cast(-6.12443207313641110962e-10), + static_cast(-4.07882839359528825925e-13), + static_cast(-3.05720104049292610799e-17), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(4.37395212065018405474e-2), + static_cast(7.18654254114820140590e-4), + static_cast(5.50371158026951899491e-6), + static_cast(1.97583864365011234715e-8), + static_cast(2.91169706068202431036e-11), + static_cast(1.17716830382540977039e-14), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -256) { + RealType t = -log2(ldexp(p, 128)); + + // Rational Approximation + // Maximum Relative Error: 3.8732e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(-1.04845570631944023913e1), + static_cast(-2.56502856165700644836e-1), + static_cast(-2.40615394566347412600e-3), + static_cast(-1.08364601171893250764e-5), + static_cast(-2.39603255140022514289e-8), + static_cast(-2.36344017673944676435e-11), + static_cast(-7.83146284114485675414e-15), + static_cast(-2.92218240202835807955e-19), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(2.17740414929742679904e-2), + static_cast(1.78084231709097280884e-4), + static_cast(6.78870668961146609668e-7), + static_cast(1.21313439060489363960e-9), + static_cast(8.89917934953781122884e-13), + static_cast(1.79115540847944524599e-16), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -512) { + RealType t = -log2(ldexp(p, 256)); + + // Rational Approximation + // Maximum Relative Error: 4.6946e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(-1.32865827226175698181e1), + static_cast(-1.61802434199627472010e-1), + static_cast(-7.55642602577784211259e-4), + static_cast(-1.69457608092375302291e-6), + static_cast(-1.86612389867293722402e-9), + static_cast(-9.17015770142364635163e-13), + static_cast(-1.51422473889348610974e-16), + static_cast(-2.81661279271583206526e-21), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(1.08518414679241420227e-2), + static_cast(4.42335224797004486239e-5), + static_cast(8.40387821972524402121e-8), + static_cast(7.48486746424527560620e-11), + static_cast(2.73676810622938942041e-14), + static_cast(2.74588200481263214866e-18), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -1024) { + RealType t = -log2(ldexp(p, 512)); + + // Rational Approximation + // Maximum Relative Error: 5.7586e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(-1.67937186583822375593e1), + static_cast(-1.01958138247797604098e-1), + static_cast(-2.37409774265951876695e-4), + static_cast(-2.65483321307104128810e-7), + static_cast(-1.45803536947907216594e-10), + static_cast(-3.57375116523338994342e-14), + static_cast(-2.94401318006358820268e-18), + static_cast(-2.73260616170245224789e-23), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(5.41357843707822974161e-3), + static_cast(1.10082540037527566536e-5), + static_cast(1.04338126042963003178e-8), + static_cast(4.63619608458569600346e-12), + static_cast(8.45781310395535984099e-16), + static_cast(4.23432554226506409568e-20), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + result = -boost::math::numeric_limits::infinity(); + } + + return result; +} + + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_quantile_lower_imp_prec(const RealType& p, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (p >= 0.4375) { + RealType t = p - static_cast (0.4375); + + // Rational Approximation + // Maximum Relative Error: 4.2901e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[10] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.48344198262277235851026749871350753173e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.18249834490570496537675012473572546187e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20191368895639224466285643454767208281e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.88388953372157636908236843798588258539e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.59477796311326067051769635858472572709e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.88799146700484120781026039104654730797e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.15708831983930955608517858269193800412e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.01389336086567891484877690859385409842e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.16683694881010716925933071465043323946e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.04356966421177683585461937085598186805e1), + }; + BOOST_MATH_STATIC const RealType Q[11] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.75444066345435020043849341970820565274e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.95105673975812427406540024601734210826e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.20381124524894051002242766595737443257e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.48370658634610329590305283520183480026e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.52213602242009530270284305006282822794e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.91028722773916006242187843372209197705e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.76130245344411748356977700519731978720e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30834721900169773543149860814908904224e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.37863084758381651884340710544840951679e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.46880981703613838666108664771931239970e0), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (p >= 0.375) { + RealType t = p - static_cast (0.375); + + // Rational Approximation + // Maximum Relative Error: 2.8433e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[11] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.17326074020471664204142312429732771661e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.23412560010002723970559712941124583385e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83665111310407767293290698145068379130e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.38459476870110655357485107373883403534e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.28751995328228442619291346921055105808e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.31663592034507247231393516167247241037e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13629333446941271397790762651183997586e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.80674058829101054663235662701823250421e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.53226182094253065852552393446365315319e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.14713948941614711932063053969010219677e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.62979741122708118776725634304028246971e0), + }; + BOOST_MATH_STATIC const RealType Q[10] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.10550060286464202595779024353437346419e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.15893254630199957990897452211066782021e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.58964066823516762861256609311733069353e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.73352515261971291505497909338586980605e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.64737859211974163695241658186141083513e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.79137714768236053008878088337762178011e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.71851514659301019977259792564627124877e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.37210093190088984630526671624779422232e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06793750951779308425209267821815264457e1), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (p >= 0.25) { + RealType t = p - static_cast (0.25); + + // Rational Approximation + // Maximum Relative Error: 5.9072e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.63281240925531315038207673147576291783e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.72733898245766165408685147762489513406e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.48666841594842113608962500631836790675e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.38711336213357101067420572773139678571e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.19536066931882831915715343914510496760e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70911330354860558400876197129777829223e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.46138758869321272507090399082047865434e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.42653825421465476333482312795245170700e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.68040069633027903153088221686431049116e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.63017854949929226947577854802720988740e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.57362168966659376351959631576588023516e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.48386631313725080746815524770260451090e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.03293129698111279047104766073456412318e1), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29511778027351594854005887702013466376e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.66155745848864270109281703659789474448e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.25628362783798417463294553777015370203e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.93162726153946899828589402901015679821e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.51582398149308841534372162372276623400e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.55512400116480727630652657714109740448e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.11949742749256615588470329024257669470e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.28090154738508864776480712360731968283e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.44307717248171941824014971579691790721e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.33595130666758203099507440236958725924e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.76156378002782668186592725145930755636e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.70446647862725980215630194019740606935e0), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (p >= 0.125) { + RealType t = p - static_cast (0.125); + + // Rational Approximation + // Maximum Relative Error: 9.9092e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.18765177572396470161180571018467019660e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.16991481696416567893311341049825218287e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.25497491118598918048058751362064598010e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.00259915194411316966036757165146681474e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.20350803730836873687692010728689867756e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.75441278117456011071671644613599089820e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18844967505497310645091822621081741562e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.84771867850847121528386231811667556346e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.78112436422992766542256241612018834150e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.82617957794395420193751983521804760378e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.77260227244465268981198741998181334875e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.61918290776044518321561351472048170874e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.10309368217936941851272359946015001037e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.68917274690585744147547352309416731690e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.96914697182030973966321601422851730384e4), + }; + BOOST_MATH_STATIC const RealType Q[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.98063872144867195074924232601423646991e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.65911346382127464683324945513128779971e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.02451223307009464199634546540152067898e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.12662676019712475980273334769644047369e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.77637305574675655673572303462430608857e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.65900204382557635710258095712789133767e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.61649498173261886264315880770449636676e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.29867325788870863753779283018061152414e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.31375646045453788071216808289409712455e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.91053361331987954531162452163243245571e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30917504462260061766689326034981496723e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.95779171217851232246427282884386844906e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.07234815204245866330282860014624832711e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21353269292094971546479026200435095695e4), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -4) { + RealType t = -log2(ldexp(p, 3)); + + // Rational Approximation + // Maximum Relative Error: 3.9653e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[11] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.18765177572396470161180571018467025793e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.94718878144788678915739777385667044494e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.01760622104142726407095836139719210570e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.27585388152893587017559610649258643106e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.26494992809545184138230791849722703452e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.19962820888633928632710264415572027960e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.10249328404135065767844288193594496173e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.94733898567966142295343935527193851633e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.10350856810280579594619259121755788797e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.23852908701349250480831167491889740823e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.61008160195204632725691076288641221707e-10), + }; + BOOST_MATH_STATIC const RealType Q[11] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59109221235949005113322202980300291082e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07119192591092503378838510797916225920e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.97313678065269932508447079892684333156e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.85743007214453288049750256975889151838e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21504290861269099964963866156493713716e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00880286431998922077891394903879512720e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.78806057460269900288838437267359072282e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15390284861815831078443996558014864171e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09877166004503937701692216421704042881e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.01544823753120969225271131241177003165e-11), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -8) { + RealType t = -log2(ldexp(p, 4)); + + // Rational Approximation + // Maximum Relative Error: 6.7872e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.59822399410385083283727681965013517187e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.30676107401101401386206170152508285083e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.14999782004905950712290914501961213222e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.33941786334132569296061539102765384372e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.67220197146826865151515598496049341734e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.80553562310354708419148501358813792297e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.69365728863553037992854715314245847166e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.66571611322125393586164383361858996769e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.60112572827002965346926427208336150737e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.82368436189138780270310776927920829805e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.53112953085778860983110669544602606343e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.47363651755817041383574210879856850108e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.56959356177318833325064543662295824581e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.16259632533790175212174199386945953139e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.51102540845397821195190063256442894688e-18), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.51733661576324699382035973518172469602e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01435607980568082538278883569729476204e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.93274478117803447229185270863587786287e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.79695020433868416640781960667235896490e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.64179255575983014759473815411232853821e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88411076775875459504324039642698874213e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47924359965537384942568979646011627522e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.80915944234873904741224397674033111178e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.67530059257263142305079790717032648103e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.59778634436027309520742387952911132163e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.45772841855129835242992919296397034883e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.40356170422999016176996652783329671363e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.31127037096552892520323998665757802862e-16), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -16) { + RealType t = -log2(ldexp(p, 8)); + + // Rational Approximation + // Maximum Relative Error: 7.6679e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.67354365380697578246790709817724831418e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.98681625368564496421038758088088788795e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.07310677200866436332040539417232353673e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.24429836496456823308103613923194387860e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.02985399190977938381625172095575017346e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.76782196972948240235456454778537838123e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.74449002785643398012450514191731166637e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.59599894044461264694825875303563328822e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.25326249969126313897827328136779597159e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.41714701672521323699602179629851858792e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.43197925999694667433180053594831915164e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.84385683045691486021670951412023644809e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.34484740544060627138216383389282372695e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.69717198111130468014375331439613690658e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.54464319402577486444841981479085908190e-22), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.56568169258142086426383908572004868200e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.52144703581828715720555168887427064424e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.87268567039210776554113754014224005739e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.04907480436107533324385788289629535047e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.05063218951887755000006493061952858632e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.88707869862323507236241669797692957827e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12876335369047582931728838551780783006e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.96657547014655679104867167083078285517e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.05799908632250375607393338998205481867e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.26543514080500125624753383852230043206e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.02102501711063497529014782040893679505e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.91577723105757841509716090936343311518e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.13824283239122976911704652025193325941e-20), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -32) { + RealType t = -log2(ldexp(p, 16)); + + // Rational Approximation + // Maximum Relative Error: 8.6323e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.92187819510636694694450607724165689649e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.97763858433958798529675258052376253402e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.51315096979196319830162238831654165509e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.63462872759639470195664268077372442947e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.42843455093529447002457295994721102683e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.09839699044798405685726233251577671229e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.42918585783783247934440868680748693033e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.39927851612709063686969934343256912856e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.08811978806833318962489621493456773153e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.05790315329766847040100971840989677130e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.45479905512618918078640786598987515012e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.26725031195260767541308143946590024995e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.93921283405116349017396651678347306610e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.46989755992471397407520449698676945629e-22), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.84098833615882764168840211033822541979e-26), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.76933186134913044021577076301874622292e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.25794095712489484166470336696962749356e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.02367257449622569623375613790000874499e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.72422948147678152291655497515112236849e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.54841786738844966378222670550160421679e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.40690766057741905625149753730480294357e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.57722740290698689456097239435447030950e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12202417878861628530322715231540006386e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.52053248659561670052645118655279630611e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.93156242508301535729374373870786335203e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40767922796889118151219837068812449420e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83472702205100162081157644960354192597e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.07575352729625387198150555665307193572e-24), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -64) { + RealType t = -log2(ldexp(p, 32)); + + // Rational Approximation + // Maximum Relative Error: 9.8799e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.41443550638291131009585191506467028820e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.27887396494095561461365753577189254063e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.12752633002089885479040650194288302309e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.79073355729340068320968150408320521772e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.92104558368762745368896313096467732214e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.31521786147036353766882145733055166296e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.69209485282228501578601478546441260206e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.17440286764020598209076590905417295956e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.24323354132100896221825450145208350291e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.53395893728239001663242998169841168859e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.68737623513761169307963299679882178852e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.34293243065056704017609034428511365032e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.16077944943653158589001897848048630079e-22), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.62195354664281744711336666974567406606e-26), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.34400562353945663460416286570988365992e-30), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.87838171063584994806998206766890809367e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.55395225505766120991458653457272783334e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.45282436421694718640472363162421055686e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29570159526914149727970023744213510609e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.75502365627517415214497786524319814617e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.74140267916464056408693097635546173776e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.53554952415602030474322188152855226456e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.25818606585833092910042975933757268581e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.79945579370700383986587672831350689541e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.64188969985517050219881299229805701044e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.59811800080967790439078895802795103852e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.28225402720788909349967839966304553864e-24), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.22313199865592821923485268860178384308e-28), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -128) { + RealType t = -log2(ldexp(p, 64)); + + // Rational Approximation + // Maximum Relative Error: 1.1548e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.23500806363233607692361021471929016922e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.16747938711332885952564885548768072606e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.58252672740225210061031286151136185818e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.15477223656783400505701577048140375949e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.51130817987857130928725357067032472382e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.68955185187558206711296837951129048907e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.30418343536824247801373327173028702308e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.96029044368524575193330776540736319950e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.53259926294848786440686413056632823519e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.09476918795964320022985872737468492126e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.20864569882477440934325776445799604204e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.01321468510611172635388570487951907905e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.45079221107904118975166347269173516170e-26), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.36559452694774774399884349254686988041e-30), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.92396730553947142987611521115040472261e-35), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.36599681872296382199486815169747516110e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.86330544720458446620644055149504593514e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.23793435123936109978741252388806998743e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41867848381419339587560909646887411175e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46493580168298395584601073832432583371e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.03537470107933172406224278121518518287e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.00375884189083338846655326801486182158e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62515041281615938158455493618149216047e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.42337949780187570810208014464208536484e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.40196890950995648233637422892711654146e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.11894256193449973803773962108906527772e-24), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00900961462911160554915139090711911885e-27), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.65825716532665817972751320034032284421e-32), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -256) { + RealType t = -log2(ldexp(p, 128)); + + // Rational Approximation + // Maximum Relative Error: 1.3756e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.04845570631944023525776899386112795330e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.18146685146173151383718092529868406030e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.13255476532916847606354932879190731233e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.44228173780288838603949849889291143631e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.18663599891768480607165516401619315227e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.61193813386433438633008774630150180359e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.54402603714659392010463991032389692959e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.76816632514967325885563032378775486543e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.22774672528068516513970610441705738842e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.47312348271366325243169398780745416279e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.10984325972747808970318612951079014854e-22), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.81620524028936785168005732104270722618e-26), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.03443227423068771484783389914203726108e-29), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.02313300749670214384591200940841254958e-34), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.40321396496046206171642334628524367374e-39), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.67292043485384876322219919215413286868e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.61652550158809553935603664087740554258e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14722810796821047167211543031044501921e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.78954660078305461714050086730116257387e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.52794892087101750452585357544956835504e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59652255206657812422503741672829368618e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.84914442937017449248597857220675602148e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.23662183613814475007146598734598810102e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.55388618781592901470236982277678753407e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.20418178834057564300014964843066904024e-24), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48606639104883413456676877330419513129e-27), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.39845313960416564778273486179935754019e-31), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14538443937605324316706211070799970095e-35), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -512) { + RealType t = -log2(ldexp(p, 256)); + + // Rational Approximation + // Maximum Relative Error: 1.6639e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.32865827226175697711590794217590458484e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.27551166309670994513910580518431041518e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.57161482253140058637495100797888501265e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.26908727392152312216118985392395130974e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.31391169101865809627389212651592902649e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.58926174475498352884244229017384309804e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.97074898765380614681225071978849430802e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.59852103341093122669197704225736036199e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.97287954178083606552531325613580819555e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.57634773176875526612407357244997035312e-22), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.34467233210713881817055138794482883359e-25), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.91236170873875898506577053309622472122e-29), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.12220990075567880730037575497818287435e-33), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.20881889651487527801970182542596258873e-37), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.56848486878078288956741060120464349537e-43), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.33267370502089423930888060969568705647e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.39632352029900752622967578086289898150e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.42703739831525305516280300008439396218e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.45767617531685380458878368024246654652e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40344962756110545138002101382437142038e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.47015258371290492450093115369080460499e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.97280918936580227687603348219414768787e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.40441024286579579491205384492088325576e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26130000914236204012152918399995098882e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.03893990417552709151955156348527062863e-27), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.12687002255767114781771099969545907763e-31), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.74218645918961186861014420578277888513e-35), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36897549168687040570349702061165281706e-39), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -1024) { + RealType t = -log2(ldexp(p, 512)); + + // Rational Approximation + // Maximum Relative Error: 2.0360e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.67937186583822375017526293948703697225e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.06681764321003187068904973985967908140e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.12508887240195683379004033347903251977e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.56847823157999127998977939588643284176e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.31281869105767454049413701029676766275e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.01498951390503036399081209706853095793e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.72866867304007090391517734634589972858e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.44819806993104486828983054294866921869e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.91441365806306460165885645136864045231e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.71364347097027340365042558634044496149e-25), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.63601648491144929836375956218857970640e-28), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.76948956673676441236280803645598939735e-32), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.47367651137203634311843318915161504046e-37), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.83187517541957887917067558455828915184e-41), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.21480186561579326423946788448005430367e-47), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16494591376838053609854716130343599036e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.97651667629616309497454026431358820357e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.77741398674456235952879526959641925087e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.39478109667902532743651043316724748827e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.35965905056304225411108295866332882930e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.83206524996481183422082802793852630990e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30320324476590103123012385840054658401e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.30322948189714718819437477682869360798e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.43729790298535728717477691270336818161e-26), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.89788697114251966298674871919685298106e-30), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.43510901856942238937717065880365530871e-34), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.38232043389918216652459244727737381677e-38), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.64599465785019268214108345671361994702e-43), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -2048) { + RealType t = -log2(ldexp(p, 1024)); + + // Rational Approximation + // Maximum Relative Error: 2.5130e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.11959316095291435774375635827672517008e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.30292575371366023255165927527306483022e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.54260157439166096303943109715675142318e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.61232819199170639867079290977704351939e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.74481972848503486840161528924694379655e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.98283577906243441829434029827766571263e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.90691715740583828769850056130458574520e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.31354728925505346732804698899977180508e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.42545288372836698650371589645832759416e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.12901629676328680681102537492164204387e-27), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.99690040253176100731314099573187027309e-31), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.07899506956133955785140496937520311210e-35), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.88607240095256436460507438213387199067e-40), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.78815984551154095621830792130401294111e-45), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.20516030880916148179297554212609531432e-51), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.81973541224235020744673910266545976833e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.49156990107280109344880219729275939242e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21606730563542176411852745162267260946e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11449329529433741944366607648360521674e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35659138507940819452801802756409587220e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.95708516718550485872934856595725983907e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78871290972777009292563576533612002908e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.60955318960542258732596447917271198307e-25), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72437581728117963125690670426402194936e-29), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.77473938855958487119889840032590783232e-33), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66198494713809467076392278745811981500e-37), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.34142897042941614778352280692901008538e-42), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.98791658647635156162347063765388728959e-47), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -4096) { + RealType t = -log2(ldexp(p, 2048)); + + // Rational Approximation + // Maximum Relative Error: 3.1220e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.67307564006689676593687414536012112755e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.21005690250741024744367516466433711478e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.11537345869365655126739041291119096558e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.82910297156061391001507891422501792453e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.51574933516708249049824513935386420692e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.56433757960363802088718489136097249753e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.74248235865301086829849817739500215149e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.19151506367084295119369434315371762091e-22), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.98518959000360170320183116510466814569e-26), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.21330422702314763225472001861559380186e-30), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.44346922987964428874014866468161821471e-34), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.59773305152191273570416120169527607421e-39), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.81894412321532723356954669501665983316e-44), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.25766346856943928908756472385992861288e-49), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.44601448994095786447982489957909713982e-55), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.90814053669730896497462224007523900520e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.72450689508305973756255440356759005330e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.76517273086984384225845151573287252506e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31844251885317815627707511621078762352e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.22689324865113257769413663010725436393e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.27524826460469866934006001123700331335e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39172850948322201614266822896191911031e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40343613937272428197414545004329993769e-27), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.17936773028976355507339458927541970545e-32), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.66512598049860260933817550698863263184e-36), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.06432209670481882442649684139775366719e-41), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.10247886333916820534393624270217678968e-46), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.40899321122058714028548211810431871877e-51), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -8192) { + RealType t = -log2(ldexp(p, 4096)); + + // Rational Approximation + // Maximum Relative Error: 3.8974e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.36960987939726803544369406181770745475e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.17239311695985070524235502979761682692e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.51192208811996535244435318068035492922e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.38938553034896173195617671475670860841e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.55156910900732478717648524688116855303e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.14905040433940475292279950923000597280e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.35237464492052939771487320880614968639e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.07937000518607459141766382199896010484e-24), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.42797358100745086706362563988598447929e-28), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.27871637324128856529004325499921407260e-32), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.99553669724530906250814559570819049401e-37), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.04272644552406682186928100080598582627e-42), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.70093315570856725077212325128817808000e-47), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.51767319872105260145583037426067406953e-53), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.87159268409640967747617639113346310759e-59), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45350026842128595165328480395513258721e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.30398532102631290226106936127181928207e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.45242264812189519858570105609209495630e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.22745462510042159972414219082495434039e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31834645038348794443252730265421155969e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44590392528760847619123404904356730177e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08436623062305193311891193246627599030e-25), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.46542554766048266351202730449796918707e-30), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78672067064478133389628198943161640913e-34), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.56573050761582685018467077197376031818e-39), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.92145137276530136848088270840255715047e-44), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96933863894082533505471662180541379922e-49), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.92661972206232945959915223259585457082e-55), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -16384) { + RealType t = -log2(ldexp(p, 8192)); + + // Rational Approximation + // Maximum Relative Error: 4.8819e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.24662793339079714510108682543625432532e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.25841960642102016210295419419373971750e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.10589156998251704634852108689102850747e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.18697614924486382142056819421294206504e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.79445222262445726654186491785652765635e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.41847338407338901513049755299049551186e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.44550500540299259432401029904726959214e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.97463434518480676079167684683604645092e-26), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.68404349202062958045327516688040625516e-30), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.14018837476359778654965300153810397742e-35), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.67726222606571327724434861967972555751e-40), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.48082398191886705229604237754446294033e-45), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.28534456209055262678153908192583037946e-51), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.99466700145428173772768099494881455874e-57), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.43473066278196981345209422626769148425e-63), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.26570199429958856038191879713341034013e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.32484776300757286079244074394356908390e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.31234182027812869096733088981702059020e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.13711443044675425837293030288097468867e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.11480036828082409994688474687120865023e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.25592803132287127389756949487347562847e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.45726006695535760451195102271978072855e-28), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13082170504731110487003517418453709982e-32), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.48217827031663836930337143509338210426e-37), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.47389053144555736191304002865419453269e-42), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.90980968229123572201281013063229644814e-47), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.79448544326289688123648457587797649323e-53), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.56179793347045575604935927245529360950e-59), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + result = -boost::math::numeric_limits::infinity(); + } + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_quantile_upper_imp_prec(const RealType& p, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (p >= 0.25) { + RealType t = p - static_cast (0.25); + + // Rational Approximation + // Maximum Absolute Error: 1.8559e-18 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(4.81512108276093785320e-1), + static_cast(-2.74296316128959647914e0), + static_cast(-3.29973875964825685757e1), + static_cast(-4.87536980816224603581e1), + static_cast(8.22233203036734027999e1), + static_cast(1.21654607908452130093e2), + static_cast(-6.66681853240657307279e1), + static_cast(-4.28101952511581488588e1), + }; + BOOST_MATH_STATIC const RealType Q[10] = { + static_cast(1.), + static_cast(8.20189490825315245036e0), + static_cast(1.63469912146101848441e1), + static_cast(-1.52740920318273920072e1), + static_cast(-5.41684560257839409762e1), + static_cast(6.51733677169299416471e0), + static_cast(3.93092001388102589237e1), + static_cast(-9.59983666140749481195e-1), + static_cast(-9.95648827557655863699e-1), + static_cast(-1.32007124426778083829e0), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (p >= 0.125) { + RealType t = p - static_cast (0.125); + + // Rational Approximation + // Maximum Absolute Error: 4.6019e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(1.70276979914029733585e0), + static_cast(2.09991992116646276165e1), + static_cast(2.26775403775298867998e1), + static_cast(-4.85384304722129472833e2), + static_cast(-1.47107146466495573999e3), + static_cast(-7.08748473959943943929e1), + static_cast(1.54245210917147215257e3), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1.), + static_cast(2.13092357122115486375e1), + static_cast(1.57318281834689144053e2), + static_cast(4.42261730187813035957e2), + static_cast(2.10814431586717588454e2), + static_cast(-6.36700983439599552504e2), + static_cast(-2.82923881266630617596e2), + static_cast(1.36613971025062750340e2), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -4) { + RealType t = -log2(ldexp(p, 3)); + + // Rational Approximation + // Maximum Relative Error: 1.2193e-19 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(4.25692449785074345588e-1), + static_cast(3.10963501706596356267e-1), + static_cast(2.91357806215297069863e-2), + static_cast(2.34716342676849303244e-2), + static_cast(5.83137296293361915583e-3), + static_cast(3.71792415497884868748e-4), + static_cast(1.59538372221030642757e-4), + static_cast(4.74040834029330213692e-6), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1.), + static_cast(4.14801234415100707213e-1), + static_cast(1.04693730144480856638e-1), + static_cast(3.81581484862997435076e-2), + static_cast(8.95334009127358617362e-3), + static_cast(1.43316686981760147226e-3), + static_cast(1.81367766024620080990e-4), + static_cast(1.54779999748286671973e-5), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else if (ilogb(p) >= -8) { + RealType t = -log2(ldexp(p, 4)); + + // Rational Approximation + // Maximum Relative Error: 4.4418e-17 + BOOST_MATH_STATIC const RealType P[11] = { + static_cast(5.07341098045260541890e-1), + static_cast(3.11771145411143166935e-1), + static_cast(1.74515601081894060888e-1), + static_cast(8.46576990174024231338e-2), + static_cast(2.57510090204322149315e-2), + static_cast(8.26605326867021684811e-3), + static_cast(1.73081423934722046819e-3), + static_cast(3.36314161099011673569e-4), + static_cast(4.50990441180388912803e-5), + static_cast(4.53513191985642134268e-6), + static_cast(2.62304611053075404923e-7), + }; + BOOST_MATH_STATIC const RealType Q[11] = { + static_cast(1.), + static_cast(5.28225379952156944029e-1), + static_cast(3.49662079845715371907e-1), + static_cast(1.45408903426879603625e-1), + static_cast(5.06773501409016231879e-2), + static_cast(1.45385556714043243731e-2), + static_cast(3.31235831325018043744e-3), + static_cast(6.06977554525543056050e-4), + static_cast(8.42406730405209749492e-5), + static_cast(8.32337989541696717905e-6), + static_cast(4.84923196546857128337e-7), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else if (ilogb(p) >= -16) { + RealType t = -log2(ldexp(p, 8)); + + // Rational Approximation + // Maximum Relative Error: 5.7932e-17 + BOOST_MATH_STATIC const RealType P[10] = { + static_cast(5.41774626094491510395e-1), + static_cast(4.11060141334529017898e-1), + static_cast(1.48195601801946264526e-1), + static_cast(3.33881552814492855873e-2), + static_cast(5.20893974732203890418e-3), + static_cast(5.84734765774178832854e-4), + static_cast(4.71028150898133935445e-5), + static_cast(2.59185739450631464618e-6), + static_cast(7.77428184258777394627e-8), + static_cast(2.51255632629650930196e-14), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1.), + static_cast(7.58341767924960527280e-1), + static_cast(2.73511775500642961539e-1), + static_cast(6.16011987856129890130e-2), + static_cast(9.61296002312356116021e-3), + static_cast(1.07890675777726076554e-3), + static_cast(8.69223632953458271977e-5), + static_cast(4.78248875031756169279e-6), + static_cast(1.43460852065144859304e-7), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else if (ilogb(p) >= -32) { + RealType t = -log2(ldexp(p, 16)); + + // Rational Approximation + // Maximum Relative Error: 9.0396e-17 + BOOST_MATH_STATIC const RealType P[9] = { + static_cast(5.41926067826974905066e-1), + static_cast(4.86926556246548518715e-1), + static_cast(2.11963908288176005856e-1), + static_cast(5.92200639925655576883e-2), + static_cast(1.18859816815542567438e-2), + static_cast(1.76833662992855443754e-3), + static_cast(2.21226152157950219596e-4), + static_cast(1.50444847316426133872e-5), + static_cast(1.87458213915373906356e-6), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1.), + static_cast(8.98511036742503939380e-1), + static_cast(3.91130673008184655152e-1), + static_cast(1.09277016228474605069e-1), + static_cast(2.19328471889880028208e-2), + static_cast(3.26305879571349016107e-3), + static_cast(4.08222014684743492069e-4), + static_cast(2.77611385768697969181e-5), + static_cast(3.45911046256304795257e-6), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else { + RealType p_square = p * p; + + if ((boost::math::isnormal)(p_square)) { + result = 1 / cbrt(p_square * constants::two_pi()); + } + else if (p > 0) { + result = 1 / (cbrt(p) * cbrt(p) * cbrt(constants::two_pi())); + } + else { + result = boost::math::numeric_limits::infinity(); + } + } + + return result; +} + + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_quantile_upper_imp_prec(const RealType& p, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (p >= 0.375) { + RealType t = p - static_cast (0.375); + + // Rational Approximation + // Maximum Absolute Error: 4.0835e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.00474815142578902619056852805926666121e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.56422290947427848191079775267512708223e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.70103710180837859003070678080056933649e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.08521918131449191445864593768320217287e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29340655781369686013042530147130581054e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.24198237124638368989049118891909723118e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.43382878809828906953609389440800537385e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.45564809127564867825118566276365267035e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.75881247317499884393790698530115428373e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.55845932095942777602241134226597158364e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.41328261385867825781522154621962338450e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06758225510372847658316203115073730186e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.10895417312529385966062255102265009972e0), + }; + BOOST_MATH_STATIC const RealType Q[12] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.88252553879196710256650370298744093367e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.54875259600848880869571364891152935969e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.78589587338618424770295921221996471887e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.15356831947775532414727361010652423453e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12951532118504570745988981200579372124e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.48163841544376327168780999614703092433e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.56786609618056303930232548304847911521e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.25610739352108840474197350343978451729e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.27063786175330237448255839666252978603e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.11941093895004369510720986032269722254e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.51487618026728514833542002963603231101e1), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (p >= 0.25) { + RealType t = p - static_cast (0.25); + + // Rational Approximation + // Maximum Absolute Error: 5.7633e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.81512108276093787175849069715334402323e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24417080443497141096829831516758083481e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.67006165991083501886186268944009973084e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.74402382755828993223083868408545308340e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.49182541725192134610277727922493871787e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.67273564707254788337557775618297381267e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.73476432616329813096120568871900178919e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31235376166262024838125198332476698090e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.59379285677781413393733801325840617522e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.38151434050794836595564739176884302539e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.33534676810383673962443893459127818078e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.38110822236764293910895765875742805411e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.42750073722992463087082849671338957023e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.54255748148299874514839812717054396793e2), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.64823387375875361292425741663822893626e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02973633484731117050245517938177308809e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.71288209768693917630236009171518272534e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.23837527610546426062625864735895938014e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.05056816585729983223036277071927165555e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.48087477651935811184913947280572029967e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04631058325147527913398256133791276127e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.69813394441679590721342220435891453447e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.92323371456465893290687995174952942311e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.68542430563281320943284015587559056621e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.17969051793607842221356465819951568080e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.82773308760283383020168853159163391394e2), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (p >= 0.125) { + RealType t = p - static_cast (0.125); + + // Rational Approximation + // Maximum Absolute Error: 2.1140e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70276979914029738186601698003670175907e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.63126762626382548478172664328434577553e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04190225271045202674546813475341133174e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.93523974140998850492859698545966806498e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19814006186501010136822066747124777014e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.55931423620290859807616748030589502039e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.78874021192395317496507459296221703565e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.03860533237347587977439662522389465152e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.77882648875352690605815508748162607271e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.05498612167816258406694194925933958145e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05326361485692298778330190198630232666e7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.85827791876754731187453265804790139032e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.93719378006868242377955041137674308589e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.56839957539576784391036362196229047625e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.95604329277359828898502487252547842378e6), + }; + BOOST_MATH_STATIC const RealType Q[16] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.79208640567193066236912382037923299779e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.94775812217734059201656828286490832145e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16467934643564936346029555887148320030e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.35525720248600096849901920839060920346e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.69760913594243328874861534307039589127e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.32330501005950982838953061458838040612e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.79610639577090112327353399739315606205e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.43314292923292828425630915931385776182e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.97538885038058371436244702169375622661e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.48431896958634429210349441846613832681e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.93459449030820736960297236799012798749e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.67200014823529787381847745962773726408e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.37035571075060153491151970623824940994e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.22682822001329636071591164177026394518e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.09781406768816062486819491582960840983e4), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (ilogb(p) >= -4) { + RealType t = -log2(ldexp(p, 3)); + + // Rational Approximation + // Maximum Relative Error: 1.1409e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.25692449785074345466504245009175450649e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75679137667345136118441108839649360362e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06171803174020856964914440692439080669e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.87798066278592051163038122952593080648e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20070543183347459409303407166630392077e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.13457391270614708627745403376469848816e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06743974464224003715510181633693539914e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16870984737226212814217822779976770316e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21845093091651861426944931268861694026e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.85357146081877929591916782097540632519e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.19085800299127898508052519062782284785e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41985644250494046067095909812634573318e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30857042700765443668305406695750760693e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.10466412567107519640190849286913680449e-10), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31914248618040435028023418981527961171e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.73578090645412656850163531828709850171e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.57329813782272411333511950903192234311e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62736127875896578315177123764520823372e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.76809643836078823237530990091078867553e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32026948719622983920194944841520771986e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.45051018027743807545734050620973716634e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.58281707210621813556068724127478674938e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.63884527227517358294732620995363921547e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15973602356223075515067915930205826229e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35069439950884795002182517078104942615e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15454119109586223908613596754794988609e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.55273685376557721039847456564342945576e-10), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else if (ilogb(p) >= -8) { + RealType t = -log2(ldexp(p, 4)); + + // Rational Approximation + // Maximum Relative Error: 1.2521e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[23] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.07341098045260497471001948654506267614e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16518383383878659278973043343250842753e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.10029094208424121908983949243560936013e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.04771840726172284780129819470963100749e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.34173170868011689830672637082451998700e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41990262178664512140746911398264330173e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.06779488545758366708787010705581103705e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.41892665233583725631482443019441608726e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.20692306716979208762785454648538891867e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.11097906809673639231336894729060830995e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.37476591232600886363441107536706973169e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.02659053066396720145189153810309784416e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.02209877191642023279303996697953314344e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.56663781532392665205516573323950583901e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.95655734237060800145227277584749429063e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.06357695252098035545383649954315685077e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.78759045059235560356343893064681290047e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.95881339136963512103591745337914059651e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.70156441275519927563064848389865812060e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.99745225746277063516394774908346367811e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.45718440382347867317547921045052714102e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.39027665085346558512961348663034579801e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05739751797738770096482688062542436470e-15), + }; + BOOST_MATH_STATIC const RealType Q[23] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43370372582239919321785765900615222895e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.52872159582703775260145036441128318159e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28243735290178057451806192890274584778e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.93375398009812888642212045868197435998e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.73364866677217419593129631900708646445e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.53645928499107852437053167521160449434e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.74280939589407863107682593092148442428e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.80095449855178765594835180574448729793e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.65924845456946706158946250220103271334e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52170861715436344002253767944763106994e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.87246437551620484806338690322735878649e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.88631873230311653853089809596759382095e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.84478812152918182782333415475103623486e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.47998768403859674841488325856607782853e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.82364683269852480160620586102339743788e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.65854316058742127585142691993199177898e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11358340340462071552670838135645042498e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22818744671190957896035448856159685984e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11038729491846772238262374112315536796e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.21355801166652957655438257794658921155e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.41278271853370874105923461404291742454e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.95100373579692323015092323646110838623e-15), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else if (ilogb(p) >= -16) { + RealType t = -log2(ldexp(p, 8)); + + // Rational Approximation + // Maximum Relative Error: 2.0703e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[21] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.41774626094491452462664949805613444094e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.96383089261273022706449773421031102175e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.16315295073029174376617863024082371446e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.65377894193914426949840018839915119410e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33210993830236821503160637845009556016e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.69315463529653886947182738378630780083e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.09869947341518160436616160018702590834e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44331691052908906654005398143769791881e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13131413925652085071882765653750661678e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.64441840437413591336927030249538399459e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78393581596372725434038621824715039765e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.50239319821178575427758224587858938204e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92619647697287767235953207451871137149e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26901081456833267780600560830367533351e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.12151768312254597726918329997945574766e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.36907507996686107513673694597817437197e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31699373909892506279113260845246144240e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.11230682511893290562864133995544214588e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.44627067257461788044784631155226503036e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39869585157420474301450400944478312794e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.82128612844034824876694595066123093042e-27), + }; + BOOST_MATH_STATIC const RealType Q[20] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.65414405277042133067228113526697909557e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32179221250476209346757936207079534440e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.74217392682100275524983756207618144313e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.45810448055940046896534973720645113799e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.81487408603233765436807980794697048675e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.49442843848941402948883852684502731460e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.66330842256792791665907478718489013963e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.93285292223845804061941359223505045576e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.87966347754794288681626114849829710697e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.13711644429711675111080150193733607164e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.61758862007482013187806625777101452737e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.55435556106272558989915248980090731639e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.34166571320580242213843747025082914011e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31411360099525131959755145015018410429e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.21684839228785650625270026640716752452e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.43021096301255274530428188746599779008e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.58831290247776456235908211620983180005e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74309280855806399632683315923592902203e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.58097100528573186098159133443927182780e-18), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else if (ilogb(p) >= -32) { + RealType t = -log2(ldexp(p, 16)); + + // Rational Approximation + // Maximum Relative Error: 3.4124e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[19] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.41926067826974814669251179264786585885e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.21141529920003643675474888047093566280e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59964592861304582755436075901659426485e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.95135112971576806260593571877646426022e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.12322024725362032809787183337883163254e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.96758465518847580191799508363466893068e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12389553946694902774213055563291192175e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04599236076217479033545023949602272721e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.16143771174487665823565565218797804931e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.38966874413947625866830582082846088427e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.02590325514935982607907975481732376204e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44376747400143802055827426602151525955e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.82088624006657184426589019067893704020e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.95757210706845964048697237729100056232e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.36096213291559182424937062842308387702e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14362780521873256616533770657488533993e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.73571098395815275003552523759665474105e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47286214854389274681661944885238913581e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.73701196181204039400706651811524874455e-34), + }; + BOOST_MATH_STATIC const RealType Q[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.77119890916406072259446489508263892540e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.95177888809731859578167185583119074026e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.29131027214559081111011582466619105016e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31442657037887347262737789825299661237e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83928863984637222329515960387531101267e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.07389089078167127136964851949662391744e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.93013847797006474150589676891548600820e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50600573851533884594030683413819219915e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.94539484213971921794449107859541806317e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.58360895032645281635534287874266252341e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.66414102108217999886628042310332365446e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07411076181287950822375436854492998754e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.61224937285582228022463072515935601355e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.89242339209389981530783624934733098598e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11030225010379194015550512905872992373e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.20285566539355859922818448335043495666e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.71782855576364068752705740544460766362e-20), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else if (ilogb(p) >= -64) { + RealType t = -log2(ldexp(p, 32)); + + // Rational Approximation + // Maximum Relative Error: 2.1680e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.41926070139289008206183757488364846894e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.78434820569480998586988738136492447574e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.07939171933509333571821660328723436210e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.92438439347811482522082798370060349739e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24288918322433485413615362874371441367e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.04437759300344740815274986587186340509e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74063952231188399929705762263485071234e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.07228849610363181194047955109059900544e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.93120850707001212714821992328252707694e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.40911049607879914351205073608184243057e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71898232013947717725198847649536278438e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06963706982203753050300400912657068823e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.79849166632277658631839126599110199710e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74682085785152276503345630444792840850e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09650236336641219916377836114077389212e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.97326394822836529817663710792553753811e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.26635728806398747570910072594323836441e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.96470010392255781222480229189380065951e-18), + }; + BOOST_MATH_STATIC const RealType Q[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.82841492468725267177870050157374330523e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.83703946702662950408034486958999188355e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09320896703777230915306208582393356690e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29346630787642344947323515884281464979e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.77242894492599243245354774839232776944e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.05722029871614922850936250945431594997e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.66920224988248720006255827987385374411e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.40887155754772190509572243444386095560e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.44545968319921473942351968892623238920e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.17198676140022989760684932594389017027e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.97376935482567419865730773801543995320e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06997835790265899882151030367297786861e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.06862653266619706928282319356971834957e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.02334307903766790059473763725329176667e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.33174535634931487079630169746402085699e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70989324903345102377898775620363767855e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.47067260145014475572799216996976703615e-18), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p)); + } + else { + RealType p_square = p * p; + + if ((boost::math::isnormal)(p_square)) { + result = 1 / cbrt(p_square * constants::two_pi()); + } + else if (p > 0) { + result = 1 / (cbrt(p) * cbrt(p) * cbrt(constants::two_pi())); + } + else { + result = boost::math::numeric_limits::infinity(); + } + } + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_quantile_imp_prec(const RealType& p, bool complement, const boost::math::integral_constant& tag) +{ + if (p > 0.5) { + return !complement ? mapairy_quantile_upper_imp_prec(1 - p, tag) : mapairy_quantile_lower_imp_prec(1 - p, tag); + } + + return complement ? mapairy_quantile_upper_imp_prec(p, tag) : mapairy_quantile_lower_imp_prec(p, tag); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_quantile_imp_prec(const RealType& p, bool complement, const boost::math::integral_constant& tag) +{ + if (p > 0.5) { + return !complement ? mapairy_quantile_upper_imp_prec(1 - p, tag) : mapairy_quantile_lower_imp_prec(1 - p, tag); + } + + return complement ? mapairy_quantile_upper_imp_prec(p, tag) : mapairy_quantile_lower_imp_prec(p, tag); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_quantile_imp(const mapairy_distribution& dist, const RealType& p, bool complement) +{ + // This routine implements the quantile for the Map-Airy distribution, + // the value p may be the probability, or its complement if complement=true. + + constexpr auto function = "boost::math::quantile(mapairy<%1%>&, %1%)"; + BOOST_MATH_STD_USING // for ADL of std functions + + RealType result = 0; + RealType scale = dist.scale(); + RealType location = dist.location(); + + if (false == detail::check_location(function, location, &result, Policy())) + { + return result; + } + if (false == detail::check_scale(function, scale, &result, Policy())) + { + return result; + } + if (false == detail::check_probability(function, p, &result, Policy())) + { + return result; + } + + typedef typename tools::promote_args::type result_type; + typedef typename policies::precision::type precision_type; + typedef boost::math::integral_constant tag_type; + + static_assert(tag_type::value, "The Map-Airy distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats"); + + result = location + scale * mapairy_quantile_imp_prec(p, complement, tag_type()); + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_mode_imp_prec(const boost::math::integral_constant&) +{ + return static_cast(-1.16158727113597068525); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_mode_imp_prec(const boost::math::integral_constant&) +{ + return BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.1615872711359706852500000803029112987); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_mode_imp(const mapairy_distribution& dist) +{ + // This implements the mode for the Map-Airy distribution, + + constexpr auto function = "boost::math::mode(mapairy<%1%>&, %1%)"; + BOOST_MATH_STD_USING // for ADL of std functions + + RealType result = 0; + RealType scale = dist.scale(); + RealType location = dist.location(); + + if (false == detail::check_location(function, location, &result, Policy())) + { + return result; + } + if (false == detail::check_scale(function, scale, &result, Policy())) + { + return result; + } + + typedef typename tools::promote_args::type result_type; + typedef typename policies::precision::type precision_type; + typedef boost::math::integral_constant tag_type; + + static_assert(tag_type::value, "The Map-Airy distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats"); + + result = location + scale * mapairy_mode_imp_prec(tag_type()); + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_median_imp_prec(const boost::math::integral_constant&) +{ + return static_cast(-0.71671068545502205332); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_median_imp_prec(const boost::math::integral_constant&) +{ + return BOOST_MATH_BIG_CONSTANT(RealType, 113, -0.71671068545502205331700196278067230944440); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_median_imp(const mapairy_distribution& dist) +{ + // This implements the median for the Map-Airy distribution, + + constexpr auto function = "boost::math::median(mapairy<%1%>&, %1%)"; + BOOST_MATH_STD_USING // for ADL of std functions + + RealType result = 0; + RealType scale = dist.scale(); + RealType location = dist.location(); + + if (false == detail::check_location(function, location, &result, Policy())) + { + return result; + } + if (false == detail::check_scale(function, scale, &result, Policy())) + { + return result; + } + + typedef typename tools::promote_args::type result_type; + typedef typename policies::precision::type precision_type; + typedef boost::math::integral_constant tag_type; + + static_assert(tag_type::value, "The Map-Airy distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats"); + + result = location + scale * mapairy_median_imp_prec(tag_type()); + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_entropy_imp_prec(const boost::math::integral_constant&) +{ + return static_cast(2.00727681841065634600); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_entropy_imp_prec(const boost::math::integral_constant&) +{ + return BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.0072768184106563460003025875575283708); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mapairy_entropy_imp(const mapairy_distribution& dist) +{ + // This implements the entropy for the Map-Airy distribution, + + constexpr auto function = "boost::math::entropy(mapairy<%1%>&, %1%)"; + BOOST_MATH_STD_USING // for ADL of std functions + + RealType result = 0; + RealType scale = dist.scale(); + + if (false == detail::check_scale(function, scale, &result, Policy())) + { + return result; + } + + typedef typename tools::promote_args::type result_type; + typedef typename policies::precision::type precision_type; + typedef boost::math::integral_constant tag_type; + + static_assert(tag_type::value, "The Map-Airy distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats"); + + result = mapairy_entropy_imp_prec(tag_type()) + log(scale); + + return result; +} + +} // detail + +template > +class mapairy_distribution +{ + public: + typedef RealType value_type; + typedef Policy policy_type; + + BOOST_MATH_GPU_ENABLED mapairy_distribution(RealType l_location = 0, RealType l_scale = 1) + : mu(l_location), c(l_scale) + { + constexpr auto function = "boost::math::mapairy_distribution<%1%>::mapairy_distribution"; + RealType result = 0; + detail::check_location(function, l_location, &result, Policy()); + detail::check_scale(function, l_scale, &result, Policy()); + } // mapairy_distribution + + BOOST_MATH_GPU_ENABLED RealType location()const + { + return mu; + } + BOOST_MATH_GPU_ENABLED RealType scale()const + { + return c; + } + + private: + RealType mu; // The location parameter. + RealType c; // The scale parameter. +}; + +typedef mapairy_distribution mapairy; + +#ifdef __cpp_deduction_guides +template +mapairy_distribution(RealType) -> mapairy_distribution::type>; +template +mapairy_distribution(RealType, RealType) -> mapairy_distribution::type>; +#endif + +template +BOOST_MATH_GPU_ENABLED inline const boost::math::pair range(const mapairy_distribution&) +{ // Range of permissible values for random variable x. + BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits::has_infinity) + { + return boost::math::pair(-boost::math::numeric_limits::infinity(), boost::math::numeric_limits::infinity()); // - to + infinity. + } + else + { // Can only use max_value. + using boost::math::tools::max_value; + return boost::math::pair(-max_value(), max_value()); // - to + max. + } +} + +template +BOOST_MATH_GPU_ENABLED inline const boost::math::pair support(const mapairy_distribution&) +{ // Range of supported values for random variable x. + // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. + BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits::has_infinity) + { + return boost::math::pair(-boost::math::numeric_limits::infinity(), boost::math::numeric_limits::infinity()); // - to + infinity. + } + else + { // Can only use max_value. + using boost::math::tools::max_value; + return boost::math::pair(-tools::max_value(), max_value()); // - to + max. + } +} + +template +BOOST_MATH_GPU_ENABLED inline RealType pdf(const mapairy_distribution& dist, const RealType& x) +{ + return detail::mapairy_pdf_imp(dist, x); +} // pdf + +template +BOOST_MATH_GPU_ENABLED inline RealType cdf(const mapairy_distribution& dist, const RealType& x) +{ + return detail::mapairy_cdf_imp(dist, x, false); +} // cdf + +template +BOOST_MATH_GPU_ENABLED inline RealType quantile(const mapairy_distribution& dist, const RealType& p) +{ + return detail::mapairy_quantile_imp(dist, p, false); +} // quantile + +template +BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) +{ + return detail::mapairy_cdf_imp(c.dist, c.param, true); +} // cdf complement + +template +BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) +{ + return detail::mapairy_quantile_imp(c.dist, c.param, true); +} // quantile complement + +template +BOOST_MATH_GPU_ENABLED inline RealType mean(const mapairy_distribution &dist) +{ + return dist.location(); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType variance(const mapairy_distribution& /*dist*/) +{ + return boost::math::numeric_limits::infinity(); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mode(const mapairy_distribution& dist) +{ + return detail::mapairy_mode_imp(dist); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType median(const mapairy_distribution& dist) +{ + return detail::mapairy_median_imp(dist); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType skewness(const mapairy_distribution& /*dist*/) +{ + // There is no skewness: + typedef typename Policy::assert_undefined_type assert_type; + static_assert(assert_type::value == 0, "The Map-Airy Distribution has no skewness"); + + return policies::raise_domain_error( + "boost::math::skewness(mapairy<%1%>&)", + "The Map-Airy distribution does not have a skewness: " + "the only possible return value is %1%.", + boost::math::numeric_limits::quiet_NaN(), Policy()); // infinity? +} + +template +BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const mapairy_distribution& /*dist*/) +{ + // There is no kurtosis: + typedef typename Policy::assert_undefined_type assert_type; + static_assert(assert_type::value == 0, "The Map-Airy Distribution has no kurtosis"); + + return policies::raise_domain_error( + "boost::math::kurtosis(mapairy<%1%>&)", + "The Map-Airy distribution does not have a kurtosis: " + "the only possible return value is %1%.", + boost::math::numeric_limits::quiet_NaN(), Policy()); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const mapairy_distribution& /*dist*/) +{ + // There is no kurtosis excess: + typedef typename Policy::assert_undefined_type assert_type; + static_assert(assert_type::value == 0, "The Map-Airy Distribution has no kurtosis excess"); + + return policies::raise_domain_error( + "boost::math::kurtosis_excess(mapairy<%1%>&)", + "The Map-Airy distribution does not have a kurtosis: " + "the only possible return value is %1%.", + boost::math::numeric_limits::quiet_NaN(), Policy()); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType entropy(const mapairy_distribution& dist) +{ + return detail::mapairy_entropy_imp(dist); +} + +}} // namespaces + + +#endif // BOOST_STATS_MAPAIRY_HPP diff --git a/include/boost/math/distributions/negative_binomial.hpp b/include/boost/math/distributions/negative_binomial.hpp index 18eec09939..f520c94803 100644 --- a/include/boost/math/distributions/negative_binomial.hpp +++ b/include/boost/math/distributions/negative_binomial.hpp @@ -44,6 +44,10 @@ #ifndef BOOST_MATH_SPECIAL_NEGATIVE_BINOMIAL_HPP #define BOOST_MATH_SPECIAL_NEGATIVE_BINOMIAL_HPP +#include +#include +#include +#include #include #include // for ibeta(a, b, x) == Ix(a, b). #include // complement. @@ -51,9 +55,7 @@ #include // isnan. #include // for root finding. #include - -#include // using std::numeric_limits; -#include +#include #if defined (BOOST_MSVC) # pragma warning(push) @@ -70,7 +72,7 @@ namespace boost { // Common error checking routines for negative binomial distribution functions: template - inline bool check_successes(const char* function, const RealType& r, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_successes(const char* function, const RealType& r, RealType* result, const Policy& pol) { if( !(boost::math::isfinite)(r) || (r <= 0) ) { @@ -82,7 +84,7 @@ namespace boost return true; } template - inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& pol) { if( !(boost::math::isfinite)(p) || (p < 0) || (p > 1) ) { @@ -94,13 +96,13 @@ namespace boost return true; } template - inline bool check_dist(const char* function, const RealType& r, const RealType& p, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* function, const RealType& r, const RealType& p, RealType* result, const Policy& pol) { return check_success_fraction(function, p, result, pol) && check_successes(function, r, result, pol); } template - inline bool check_dist_and_k(const char* function, const RealType& r, const RealType& p, RealType k, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_dist_and_k(const char* function, const RealType& r, const RealType& p, RealType k, RealType* result, const Policy& pol) { if(check_dist(function, r, p, result, pol) == false) { @@ -117,7 +119,7 @@ namespace boost } // Check_dist_and_k template - inline bool check_dist_and_prob(const char* function, const RealType& r, RealType p, RealType prob, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_dist_and_prob(const char* function, const RealType& r, RealType p, RealType prob, RealType* result, const Policy& pol) { if((check_dist(function, r, p, result, pol) && detail::check_probability(function, prob, result, pol)) == false) { @@ -134,7 +136,7 @@ namespace boost typedef RealType value_type; typedef Policy policy_type; - negative_binomial_distribution(RealType r, RealType p) : m_r(r), m_p(p) + BOOST_MATH_GPU_ENABLED negative_binomial_distribution(RealType r, RealType p) : m_r(r), m_p(p) { // Constructor. RealType result; negative_binomial_detail::check_dist( @@ -145,21 +147,21 @@ namespace boost } // negative_binomial_distribution constructor. // Private data getter class member functions. - RealType success_fraction() const + BOOST_MATH_GPU_ENABLED RealType success_fraction() const { // Probability of success as fraction in range 0 to 1. return m_p; } - RealType successes() const + BOOST_MATH_GPU_ENABLED RealType successes() const { // Total number of successes r. return m_r; } - static RealType find_lower_bound_on_p( + BOOST_MATH_GPU_ENABLED static RealType find_lower_bound_on_p( RealType trials, RealType successes, RealType alpha) // alpha 0.05 equivalent to 95% for one-sided test. { - static const char* function = "boost::math::negative_binomial<%1%>::find_lower_bound_on_p"; + constexpr auto function = "boost::math::negative_binomial<%1%>::find_lower_bound_on_p"; RealType result = 0; // of error checks. RealType failures = trials - successes; if(false == detail::check_probability(function, alpha, &result, Policy()) @@ -179,12 +181,12 @@ namespace boost return ibeta_inv(successes, failures + 1, alpha, static_cast(nullptr), Policy()); } // find_lower_bound_on_p - static RealType find_upper_bound_on_p( + BOOST_MATH_GPU_ENABLED static RealType find_upper_bound_on_p( RealType trials, RealType successes, RealType alpha) // alpha 0.05 equivalent to 95% for one-sided test. { - static const char* function = "boost::math::negative_binomial<%1%>::find_upper_bound_on_p"; + constexpr auto function = "boost::math::negative_binomial<%1%>::find_upper_bound_on_p"; RealType result = 0; // of error checks. RealType failures = trials - successes; if(false == negative_binomial_detail::check_dist_and_k( @@ -210,12 +212,12 @@ namespace boost // Estimate number of trials : // "How many trials do I need to be P% sure of seeing k or fewer failures?" - static RealType find_minimum_number_of_trials( + BOOST_MATH_GPU_ENABLED static RealType find_minimum_number_of_trials( RealType k, // number of failures (k >= 0). RealType p, // success fraction 0 <= p <= 1. RealType alpha) // risk level threshold 0 <= alpha <= 1. { - static const char* function = "boost::math::negative_binomial<%1%>::find_minimum_number_of_trials"; + constexpr auto function = "boost::math::negative_binomial<%1%>::find_minimum_number_of_trials"; // Error checks: RealType result = 0; if(false == negative_binomial_detail::check_dist_and_k( @@ -227,12 +229,12 @@ namespace boost return result + k; } // RealType find_number_of_failures - static RealType find_maximum_number_of_trials( + BOOST_MATH_GPU_ENABLED static RealType find_maximum_number_of_trials( RealType k, // number of failures (k >= 0). RealType p, // success fraction 0 <= p <= 1. RealType alpha) // risk level threshold 0 <= alpha <= 1. { - static const char* function = "boost::math::negative_binomial<%1%>::find_maximum_number_of_trials"; + constexpr auto function = "boost::math::negative_binomial<%1%>::find_maximum_number_of_trials"; // Error checks: RealType result = 0; if(false == negative_binomial_detail::check_dist_and_k( @@ -257,22 +259,22 @@ namespace boost #endif template - inline const std::pair range(const negative_binomial_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair range(const negative_binomial_distribution& /* dist */) { // Range of permissible values for random variable k. using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); // max_integer? + return boost::math::pair(static_cast(0), max_value()); // max_integer? } template - inline const std::pair support(const negative_binomial_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair support(const negative_binomial_distribution& /* dist */) { // Range of supported values for random variable k. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); // max_integer? + return boost::math::pair(static_cast(0), max_value()); // max_integer? } template - inline RealType mean(const negative_binomial_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mean(const negative_binomial_distribution& dist) { // Mean of Negative Binomial distribution = r(1-p)/p. return dist.successes() * (1 - dist.success_fraction() ) / dist.success_fraction(); } // mean @@ -285,14 +287,14 @@ namespace boost // Now implemented via quantile(half) in derived accessors. template - inline RealType mode(const negative_binomial_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mode(const negative_binomial_distribution& dist) { // Mode of Negative Binomial distribution = floor[(r-1) * (1 - p)/p] BOOST_MATH_STD_USING // ADL of std functions. return floor((dist.successes() -1) * (1 - dist.success_fraction()) / dist.success_fraction()); } // mode template - inline RealType skewness(const negative_binomial_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType skewness(const negative_binomial_distribution& dist) { // skewness of Negative Binomial distribution = 2-p / (sqrt(r(1-p)) BOOST_MATH_STD_USING // ADL of std functions. RealType p = dist.success_fraction(); @@ -303,7 +305,7 @@ namespace boost } // skewness template - inline RealType kurtosis(const negative_binomial_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const negative_binomial_distribution& dist) { // kurtosis of Negative Binomial distribution // http://en.wikipedia.org/wiki/Negative_binomial is kurtosis_excess so add 3 RealType p = dist.success_fraction(); @@ -312,7 +314,7 @@ namespace boost } // kurtosis template - inline RealType kurtosis_excess(const negative_binomial_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const negative_binomial_distribution& dist) { // kurtosis excess of Negative Binomial distribution // http://mathworld.wolfram.com/Kurtosis.html table of kurtosis_excess RealType p = dist.success_fraction(); @@ -321,7 +323,7 @@ namespace boost } // kurtosis_excess template - inline RealType variance(const negative_binomial_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType variance(const negative_binomial_distribution& dist) { // Variance of Binomial distribution = r (1-p) / p^2. return dist.successes() * (1 - dist.success_fraction()) / (dist.success_fraction() * dist.success_fraction()); @@ -335,11 +337,11 @@ namespace boost // chf of Negative Binomial distribution provided by derived accessors. template - inline RealType pdf(const negative_binomial_distribution& dist, const RealType& k) + BOOST_MATH_GPU_ENABLED inline RealType pdf(const negative_binomial_distribution& dist, const RealType& k) { // Probability Density/Mass Function. BOOST_FPU_EXCEPTION_GUARD - static const char* function = "boost::math::pdf(const negative_binomial_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::pdf(const negative_binomial_distribution<%1%>&, %1%)"; RealType r = dist.successes(); RealType p = dist.success_fraction(); @@ -361,9 +363,9 @@ namespace boost } // negative_binomial_pdf template - inline RealType cdf(const negative_binomial_distribution& dist, const RealType& k) + BOOST_MATH_GPU_ENABLED inline RealType cdf(const negative_binomial_distribution& dist, const RealType& k) { // Cumulative Distribution Function of Negative Binomial. - static const char* function = "boost::math::cdf(const negative_binomial_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const negative_binomial_distribution<%1%>&, %1%)"; using boost::math::ibeta; // Regularized incomplete beta function. // k argument may be integral, signed, or unsigned, or floating point. // If necessary, it has already been promoted from an integral type. @@ -387,10 +389,10 @@ namespace boost } // cdf Cumulative Distribution Function Negative Binomial. template - inline RealType cdf(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { // Complemented Cumulative Distribution Function Negative Binomial. - static const char* function = "boost::math::cdf(const negative_binomial_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const negative_binomial_distribution<%1%>&, %1%)"; using boost::math::ibetac; // Regularized incomplete beta function complement. // k argument may be integral, signed, or unsigned, or floating point. // If necessary, it has already been promoted from an integral type. @@ -421,7 +423,7 @@ namespace boost } // cdf Cumulative Distribution Function Negative Binomial. template - inline RealType quantile(const negative_binomial_distribution& dist, const RealType& P) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const negative_binomial_distribution& dist, const RealType& P) { // Quantile, percentile/100 or Percent Point Negative Binomial function. // Return the number of expected failures k for a given probability p. @@ -429,7 +431,7 @@ namespace boost // MAthCAD pnbinom return smallest k such that negative_binomial(k, n, p) >= probability. // k argument may be integral, signed, or unsigned, or floating point. // BUT Cephes/CodeCogs says: finds argument p (0 to 1) such that cdf(k, n, p) = y - static const char* function = "boost::math::quantile(const negative_binomial_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const negative_binomial_distribution<%1%>&, %1%)"; BOOST_MATH_STD_USING // ADL of std functions. RealType p = dist.success_fraction(); @@ -484,7 +486,7 @@ namespace boost // // Cornish-Fisher Negative binomial approximation not accurate in this area: // - guess = (std::min)(RealType(r * 2), RealType(10)); + guess = BOOST_MATH_GPU_SAFE_MIN(RealType(r * 2), RealType(10)); } else factor = (1-P < sqrt(tools::epsilon())) ? 2 : (guess < 20 ? 1.2f : 1.1f); @@ -492,7 +494,7 @@ namespace boost // // Max iterations permitted: // - std::uintmax_t max_iter = policies::get_max_root_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_root_iterations(); typedef typename Policy::discrete_quantile_type discrete_type; return detail::inverse_discrete_quantile( dist, @@ -506,11 +508,11 @@ namespace boost } // RealType quantile(const negative_binomial_distribution dist, p) template - inline RealType quantile(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { // Quantile or Percent Point Binomial function. // Return the number of expected failures k for a given // complement of the probability Q = 1 - P. - static const char* function = "boost::math::quantile(const negative_binomial_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const negative_binomial_distribution<%1%>&, %1%)"; BOOST_MATH_STD_USING // Error checks: @@ -571,7 +573,7 @@ namespace boost // // Cornish-Fisher Negative binomial approximation not accurate in this area: // - guess = (std::min)(RealType(r * 2), RealType(10)); + guess = BOOST_MATH_GPU_SAFE_MIN(RealType(r * 2), RealType(10)); } else factor = (Q < sqrt(tools::epsilon())) ? 2 : (guess < 20 ? 1.2f : 1.1f); @@ -579,7 +581,7 @@ namespace boost // // Max iterations permitted: // - std::uintmax_t max_iter = policies::get_max_root_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_root_iterations(); typedef typename Policy::discrete_quantile_type discrete_type; return detail::inverse_discrete_quantile( dist, diff --git a/include/boost/math/distributions/non_central_beta.hpp b/include/boost/math/distributions/non_central_beta.hpp index 66b12e870a..9dd7d5e60b 100644 --- a/include/boost/math/distributions/non_central_beta.hpp +++ b/include/boost/math/distributions/non_central_beta.hpp @@ -1,7 +1,7 @@ // boost\math\distributions\non_central_beta.hpp // Copyright John Maddock 2008. - +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt @@ -10,6 +10,10 @@ #ifndef BOOST_MATH_SPECIAL_NON_CENTRAL_BETA_HPP #define BOOST_MATH_SPECIAL_NON_CENTRAL_BETA_HPP +#include +#include +#include +#include #include #include // for incomplete gamma. gamma_q #include // complements @@ -20,6 +24,7 @@ #include #include // for root finding. #include +#include namespace boost { @@ -32,14 +37,14 @@ namespace boost namespace detail{ template - T non_central_beta_p(T a, T b, T lam, T x, T y, const Policy& pol, T init_val = 0) + BOOST_MATH_GPU_ENABLED T non_central_beta_p(T a, T b, T lam, T x, T y, const Policy& pol, T init_val = 0) { BOOST_MATH_STD_USING using namespace boost::math; // // Variables come first: // - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); T errtol = boost::math::policies::get_epsilon(); T l2 = lam / 2; // @@ -86,7 +91,7 @@ namespace boost // direction for recursion: // T last_term = 0; - std::uintmax_t count = k; + boost::math::uintmax_t count = k; for(auto i = k; i >= 0; --i) { T term = beta * pois; @@ -120,7 +125,7 @@ namespace boost break; } last_term = term; - if(static_cast(count + i - k) > max_iter) + if(static_cast(count + i - k) > max_iter) { return policies::raise_evaluation_error("cdf(non_central_beta_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE } @@ -129,14 +134,14 @@ namespace boost } template - T non_central_beta_q(T a, T b, T lam, T x, T y, const Policy& pol, T init_val = 0) + BOOST_MATH_GPU_ENABLED T non_central_beta_q(T a, T b, T lam, T x, T y, const Policy& pol, T init_val = 0) { BOOST_MATH_STD_USING using namespace boost::math; // // Variables come first: // - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); T errtol = boost::math::policies::get_epsilon(); T l2 = lam / 2; // @@ -185,7 +190,7 @@ namespace boost // of the bulk of the sum: // T last_term = 0; - std::uintmax_t count = 0; + boost::math::uintmax_t count = 0; for(auto i = k + 1; ; ++i) { poisf *= l2 / i; @@ -199,7 +204,7 @@ namespace boost count = i - k; break; } - if(static_cast(i - k) > max_iter) + if(static_cast(i - k) > max_iter) { return policies::raise_evaluation_error("cdf(non_central_beta_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE } @@ -213,7 +218,7 @@ namespace boost { break; } - if(static_cast(count + k - i) > max_iter) + if(static_cast(count + k - i) > max_iter) { return policies::raise_evaluation_error("cdf(non_central_beta_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE } @@ -228,7 +233,7 @@ namespace boost } template - inline RealType non_central_beta_cdf(RealType x, RealType y, RealType a, RealType b, RealType l, bool invert, const Policy&) + BOOST_MATH_GPU_ENABLED inline RealType non_central_beta_cdf(RealType x, RealType y, RealType a, RealType b, RealType l, bool invert, const Policy&) { typedef typename policies::evaluation::type value_type; typedef typename policies::normalise< @@ -283,10 +288,10 @@ namespace boost template struct nc_beta_quantile_functor { - nc_beta_quantile_functor(const non_central_beta_distribution& d, T t, bool c) + BOOST_MATH_GPU_ENABLED nc_beta_quantile_functor(const non_central_beta_distribution& d, T t, bool c) : dist(d), target(t), comp(c) {} - T operator()(const T& x) + BOOST_MATH_GPU_ENABLED T operator()(const T& x) { return comp ? T(target - cdf(complement(dist, x))) @@ -305,10 +310,10 @@ namespace boost // heuristics. // template - std::pair bracket_and_solve_root_01(F f, const T& guess, T factor, bool rising, Tol tol, std::uintmax_t& max_iter, const Policy& pol) + BOOST_MATH_GPU_ENABLED boost::math::pair bracket_and_solve_root_01(F f, const T& guess, T factor, bool rising, Tol tol, boost::math::uintmax_t& max_iter, const Policy& pol) { BOOST_MATH_STD_USING - static const char* function = "boost::math::tools::bracket_and_solve_root_01<%1%>"; + constexpr auto function = "boost::math::tools::bracket_and_solve_root_01<%1%>"; // // Set up initial brackets: // @@ -319,7 +324,7 @@ namespace boost // // Set up invocation count: // - std::uintmax_t count = max_iter - 1; + boost::math::uintmax_t count = max_iter - 1; if((fa < 0) == (guess < 0 ? !rising : rising)) { @@ -332,7 +337,7 @@ namespace boost if(count == 0) { b = policies::raise_evaluation_error(function, "Unable to bracket root, last nearest value was %1%", b, pol); // LCOV_EXCL_LINE - return std::make_pair(a, b); + return boost::math::make_pair(a, b); } // // Heuristic: every 20 iterations we double the growth factor in case the @@ -365,12 +370,12 @@ namespace boost // Escape route just in case the answer is zero! max_iter -= count; max_iter += 1; - return a > 0 ? std::make_pair(T(0), T(a)) : std::make_pair(T(a), T(0)); + return a > 0 ? boost::math::make_pair(T(0), T(a)) : boost::math::make_pair(T(a), T(0)); } if(count == 0) { a = policies::raise_evaluation_error(function, "Unable to bracket root, last nearest value was %1%", a, pol); // LCOV_EXCL_LINE - return std::make_pair(a, b); + return boost::math::make_pair(a, b); } // // Heuristic: every 20 iterations we double the growth factor in case the @@ -391,7 +396,7 @@ namespace boost } max_iter -= count; max_iter += 1; - std::pair r = toms748_solve( + boost::math::pair r = toms748_solve( f, (a < 0 ? b : a), (a < 0 ? a : b), @@ -406,9 +411,9 @@ namespace boost } template - RealType nc_beta_quantile(const non_central_beta_distribution& dist, const RealType& p, bool comp) + BOOST_MATH_GPU_ENABLED RealType nc_beta_quantile(const non_central_beta_distribution& dist, const RealType& p, bool comp) { - static const char* function = "quantile(non_central_beta_distribution<%1%>, %1%)"; + constexpr auto function = "quantile(non_central_beta_distribution<%1%>, %1%)"; typedef typename policies::evaluation::type value_type; typedef typename policies::normalise< Policy, @@ -505,9 +510,9 @@ namespace boost detail::nc_beta_quantile_functor f(non_central_beta_distribution(a, b, l), p, comp); tools::eps_tolerance tol(policies::digits()); - std::uintmax_t max_iter = policies::get_max_root_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_root_iterations(); - std::pair ir + boost::math::pair ir = bracket_and_solve_root_01( f, guess, value_type(2.5), true, tol, max_iter, Policy()); @@ -530,7 +535,7 @@ namespace boost } template - T non_central_beta_pdf(T a, T b, T lam, T x, T y, const Policy& pol) + BOOST_MATH_GPU_ENABLED T non_central_beta_pdf(T a, T b, T lam, T x, T y, const Policy& pol) { BOOST_MATH_STD_USING // @@ -541,7 +546,7 @@ namespace boost // // Variables come first: // - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); T errtol = boost::math::policies::get_epsilon(); T l2 = lam / 2; // @@ -580,7 +585,7 @@ namespace boost // // Stable backwards recursion first: // - std::uintmax_t count = k; + boost::math::uintmax_t count = k; T ratio = 0; T old_ratio = 0; for(auto i = k; i >= 0; --i) @@ -615,7 +620,7 @@ namespace boost break; } old_ratio = ratio; - if(static_cast(count + i - k) > max_iter) + if(static_cast(count + i - k) > max_iter) { return policies::raise_evaluation_error("pdf(non_central_beta_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE } @@ -624,10 +629,10 @@ namespace boost } template - RealType nc_beta_pdf(const non_central_beta_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED RealType nc_beta_pdf(const non_central_beta_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING - static const char* function = "pdf(non_central_beta_distribution<%1%>, %1%)"; + constexpr auto function = "pdf(non_central_beta_distribution<%1%>, %1%)"; typedef typename policies::evaluation::type value_type; typedef typename policies::normalise< Policy, @@ -672,8 +677,8 @@ namespace boost struct hypergeometric_2F2_sum { typedef T result_type; - hypergeometric_2F2_sum(T a1_, T a2_, T b1_, T b2_, T z_) : a1(a1_), a2(a2_), b1(b1_), b2(b2_), z(z_), term(1), k(0) {} - T operator()() + BOOST_MATH_GPU_ENABLED hypergeometric_2F2_sum(T a1_, T a2_, T b1_, T b2_, T z_) : a1(a1_), a2(a2_), b1(b1_), b2(b2_), z(z_), term(1), k(0) {} + BOOST_MATH_GPU_ENABLED T operator()() { T result = term; term *= a1 * a2 / (b1 * b2); @@ -690,14 +695,14 @@ namespace boost }; template - T hypergeometric_2F2(T a1, T a2, T b1, T b2, T z, const Policy& pol) + BOOST_MATH_GPU_ENABLED T hypergeometric_2F2(T a1, T a2, T b1, T b2, T z, const Policy& pol) { typedef typename policies::evaluation::type value_type; const char* function = "boost::math::detail::hypergeometric_2F2<%1%>(%1%,%1%,%1%,%1%,%1%)"; hypergeometric_2F2_sum s(a1, a2, b1, b2, z); - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); value_type result = boost::math::tools::sum_series(s, boost::math::policies::get_epsilon(), max_iter); @@ -714,7 +719,7 @@ namespace boost typedef RealType value_type; typedef Policy policy_type; - non_central_beta_distribution(RealType a_, RealType b_, RealType lambda) : a(a_), b(b_), ncp(lambda) + BOOST_MATH_GPU_ENABLED non_central_beta_distribution(RealType a_, RealType b_, RealType lambda) : a(a_), b(b_), ncp(lambda) { const char* function = "boost::math::non_central_beta_distribution<%1%>::non_central_beta_distribution(%1%,%1%)"; RealType r; @@ -731,15 +736,15 @@ namespace boost Policy()); } // non_central_beta_distribution constructor. - RealType alpha() const + BOOST_MATH_GPU_ENABLED RealType alpha() const { // Private data getter function. return a; } - RealType beta() const + BOOST_MATH_GPU_ENABLED RealType beta() const { // Private data getter function. return b; } - RealType non_centrality() const + BOOST_MATH_GPU_ENABLED RealType non_centrality() const { // Private data getter function. return ncp; } @@ -760,24 +765,24 @@ namespace boost // Non-member functions to give properties of the distribution. template - inline const std::pair range(const non_central_beta_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair range(const non_central_beta_distribution& /* dist */) { // Range of permissible values for random variable k. using boost::math::tools::max_value; - return std::pair(static_cast(0), static_cast(1)); + return boost::math::pair(static_cast(0), static_cast(1)); } template - inline const std::pair support(const non_central_beta_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair support(const non_central_beta_distribution& /* dist */) { // Range of supported values for random variable k. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. using boost::math::tools::max_value; - return std::pair(static_cast(0), static_cast(1)); + return boost::math::pair(static_cast(0), static_cast(1)); } template - inline RealType mode(const non_central_beta_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mode(const non_central_beta_distribution& dist) { // mode. - static const char* function = "mode(non_central_beta_distribution<%1%> const&)"; + constexpr auto function = "mode(non_central_beta_distribution<%1%> const&)"; RealType a = dist.alpha(); RealType b = dist.beta(); @@ -812,7 +817,7 @@ namespace boost // later: // template - inline RealType mean(const non_central_beta_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mean(const non_central_beta_distribution& dist) { BOOST_MATH_STD_USING RealType a = dist.alpha(); @@ -823,7 +828,7 @@ namespace boost } // mean template - inline RealType variance(const non_central_beta_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType variance(const non_central_beta_distribution& dist) { // // Relative error of this function may be arbitrarily large... absolute @@ -843,41 +848,41 @@ namespace boost // RealType standard_deviation(const non_central_beta_distribution& dist) // standard_deviation provided by derived accessors. template - inline RealType skewness(const non_central_beta_distribution& /*dist*/) + BOOST_MATH_GPU_ENABLED inline RealType skewness(const non_central_beta_distribution& /*dist*/) { // skewness = sqrt(l). const char* function = "boost::math::non_central_beta_distribution<%1%>::skewness()"; typedef typename Policy::assert_undefined_type assert_type; - static_assert(assert_type::value == 0, "Assert type is undefined."); + static_assert(assert_type::value == 0, "The Non Central Beta Distribution has no skewness."); return policies::raise_evaluation_error(function, "This function is not yet implemented, the only sensible result is %1%.", // LCOV_EXCL_LINE - std::numeric_limits::quiet_NaN(), Policy()); // infinity? LCOV_EXCL_LINE + boost::math::numeric_limits::quiet_NaN(), Policy()); // infinity? LCOV_EXCL_LINE } template - inline RealType kurtosis_excess(const non_central_beta_distribution& /*dist*/) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const non_central_beta_distribution& /*dist*/) { const char* function = "boost::math::non_central_beta_distribution<%1%>::kurtosis_excess()"; typedef typename Policy::assert_undefined_type assert_type; - static_assert(assert_type::value == 0, "Assert type is undefined."); + static_assert(assert_type::value == 0, "The Non Central Beta Distribution has no kurtosis excess."); return policies::raise_evaluation_error(function, "This function is not yet implemented, the only sensible result is %1%.", // LCOV_EXCL_LINE - std::numeric_limits::quiet_NaN(), Policy()); // infinity? LCOV_EXCL_LINE + boost::math::numeric_limits::quiet_NaN(), Policy()); // infinity? LCOV_EXCL_LINE } // kurtosis_excess template - inline RealType kurtosis(const non_central_beta_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const non_central_beta_distribution& dist) { return kurtosis_excess(dist) + 3; } template - inline RealType pdf(const non_central_beta_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED inline RealType pdf(const non_central_beta_distribution& dist, const RealType& x) { // Probability Density/Mass Function. return detail::nc_beta_pdf(dist, x); } // pdf template - RealType cdf(const non_central_beta_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED RealType cdf(const non_central_beta_distribution& dist, const RealType& x) { const char* function = "boost::math::non_central_beta_distribution<%1%>::cdf(%1%)"; RealType a = dist.alpha(); @@ -912,7 +917,7 @@ namespace boost } // cdf template - RealType cdf(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { // Complemented Cumulative Distribution Function const char* function = "boost::math::non_central_beta_distribution<%1%>::cdf(%1%)"; non_central_beta_distribution const& dist = c.dist; @@ -949,13 +954,13 @@ namespace boost } // ccdf template - inline RealType quantile(const non_central_beta_distribution& dist, const RealType& p) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const non_central_beta_distribution& dist, const RealType& p) { // Quantile (or Percent Point) function. return detail::nc_beta_quantile(dist, p, false); } // quantile template - inline RealType quantile(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { // Quantile (or Percent Point) function. return detail::nc_beta_quantile(c.dist, c.param, true); } // quantile complement. diff --git a/include/boost/math/distributions/non_central_chi_squared.hpp b/include/boost/math/distributions/non_central_chi_squared.hpp index f59be9932c..5917b3732d 100644 --- a/include/boost/math/distributions/non_central_chi_squared.hpp +++ b/include/boost/math/distributions/non_central_chi_squared.hpp @@ -1,7 +1,7 @@ // boost\math\distributions\non_central_chi_squared.hpp // Copyright John Maddock 2008. - +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt @@ -10,6 +10,10 @@ #ifndef BOOST_MATH_SPECIAL_NON_CENTRAL_CHI_SQUARE_HPP #define BOOST_MATH_SPECIAL_NON_CENTRAL_CHI_SQUARE_HPP +#include +#include +#include +#include #include #include // for incomplete gamma. gamma_q #include // for cyl_bessel_i @@ -21,6 +25,7 @@ #include // for root finding. #include #include +#include namespace boost { @@ -33,7 +38,7 @@ namespace boost namespace detail{ template - T non_central_chi_square_q(T x, T f, T theta, const Policy& pol, T init_sum = 0) + BOOST_MATH_GPU_ENABLED T non_central_chi_square_q(T x, T f, T theta, const Policy& pol, T init_sum = 0) { // // Computes the complement of the Non-Central Chi-Square @@ -62,7 +67,7 @@ namespace boost T lambda = theta / 2; T del = f / 2; T y = x / 2; - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); T errtol = boost::math::policies::get_epsilon(); T sum = init_sum; // @@ -89,7 +94,7 @@ namespace boost // recurrences: // long long i; - for(i = k; static_cast(i-k) < max_iter; ++i) + for(i = k; static_cast(i-k) < max_iter; ++i) { T term = poisf * gamf; sum += term; @@ -100,7 +105,7 @@ namespace boost break; } //Error check: - if(static_cast(i-k) >= max_iter) + if(static_cast(i-k) >= max_iter) return policies::raise_evaluation_error("cdf(non_central_chi_squared_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE // // Now backwards iteration: the gamma @@ -126,7 +131,7 @@ namespace boost } template - T non_central_chi_square_p_ding(T x, T f, T theta, const Policy& pol, T init_sum = 0) + BOOST_MATH_GPU_ENABLED T non_central_chi_square_p_ding(T x, T f, T theta, const Policy& pol, T init_sum = 0) { // // This is an implementation of: @@ -155,12 +160,12 @@ namespace boost if(sum == 0) return sum; - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); T errtol = boost::math::policies::get_epsilon(); int i; T lterm(0), term(0); - for(i = 1; static_cast(i) < max_iter; ++i) + for(i = 1; static_cast(i) < max_iter; ++i) { tk = tk * x / (f + 2 * i); uk = uk * lambda / i; @@ -172,14 +177,14 @@ namespace boost break; } //Error check: - if(static_cast(i) >= max_iter) + if(static_cast(i) >= max_iter) return policies::raise_evaluation_error("cdf(non_central_chi_squared_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE return sum; } template - T non_central_chi_square_p(T y, T n, T lambda, const Policy& pol, T init_sum) + BOOST_MATH_GPU_ENABLED T non_central_chi_square_p(T y, T n, T lambda, const Policy& pol, T init_sum) { // // This is taken more or less directly from: @@ -198,7 +203,7 @@ namespace boost // Special case: if(y == 0) return 0; - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); T errtol = boost::math::policies::get_epsilon(); T errorf(0), errorb(0); @@ -266,23 +271,23 @@ namespace boost errorf = poiskf * gamkf; sum += errorf; ++i; - }while((fabs(errorf / sum) > errtol) && (static_cast(i) < max_iter)); + }while((fabs(errorf / sum) > errtol) && (static_cast(i) < max_iter)); //Error check: - if(static_cast(i) >= max_iter) + if(static_cast(i) >= max_iter) return policies::raise_evaluation_error("cdf(non_central_chi_squared_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE return sum; } template - T non_central_chi_square_pdf(T x, T n, T lambda, const Policy& pol) + BOOST_MATH_GPU_ENABLED T non_central_chi_square_pdf(T x, T n, T lambda, const Policy& pol) { // // As above but for the PDF: // BOOST_MATH_STD_USING - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); T errtol = boost::math::policies::get_epsilon(); T x2 = x / 2; T n2 = n / 2; @@ -298,7 +303,7 @@ namespace boost sum += pois; if(pois / sum < errtol) break; - if(static_cast(i - k) >= max_iter) + if(static_cast(i - k) >= max_iter) return policies::raise_evaluation_error("pdf(non_central_chi_squared_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE pois *= l2 * x2 / ((i + 1) * (n2 + i)); } @@ -313,7 +318,7 @@ namespace boost } template - inline RealType non_central_chi_squared_cdf(RealType x, RealType k, RealType l, bool invert, const Policy&) + BOOST_MATH_GPU_ENABLED inline RealType non_central_chi_squared_cdf(RealType x, RealType k, RealType l, bool invert, const Policy&) { typedef typename policies::evaluation::type value_type; typedef typename policies::normalise< @@ -373,10 +378,10 @@ namespace boost template struct nccs_quantile_functor { - nccs_quantile_functor(const non_central_chi_squared_distribution& d, T t, bool c) + BOOST_MATH_GPU_ENABLED nccs_quantile_functor(const non_central_chi_squared_distribution& d, T t, bool c) : dist(d), target(t), comp(c) {} - T operator()(const T& x) + BOOST_MATH_GPU_ENABLED T operator()(const T& x) { return comp ? target - cdf(complement(dist, x)) @@ -390,10 +395,10 @@ namespace boost }; template - RealType nccs_quantile(const non_central_chi_squared_distribution& dist, const RealType& p, bool comp) + BOOST_MATH_GPU_ENABLED RealType nccs_quantile(const non_central_chi_squared_distribution& dist, const RealType& p, bool comp) { BOOST_MATH_STD_USING - static const char* function = "quantile(non_central_chi_squared_distribution<%1%>, %1%)"; + constexpr auto function = "quantile(non_central_chi_squared_distribution<%1%>, %1%)"; typedef typename policies::evaluation::type value_type; typedef typename policies::normalise< Policy, @@ -481,10 +486,10 @@ namespace boost } template - RealType nccs_pdf(const non_central_chi_squared_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED RealType nccs_pdf(const non_central_chi_squared_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING - static const char* function = "pdf(non_central_chi_squared_distribution<%1%>, %1%)"; + constexpr auto function = "pdf(non_central_chi_squared_distribution<%1%>, %1%)"; typedef typename policies::evaluation::type value_type; typedef typename policies::normalise< Policy, @@ -545,11 +550,11 @@ namespace boost template struct degrees_of_freedom_finder { - degrees_of_freedom_finder( + BOOST_MATH_GPU_ENABLED degrees_of_freedom_finder( RealType lam_, RealType x_, RealType p_, bool c) : lam(lam_), x(x_), p(p_), comp(c) {} - RealType operator()(const RealType& v) + BOOST_MATH_GPU_ENABLED RealType operator()(const RealType& v) { non_central_chi_squared_distribution d(v, lam); return comp ? @@ -564,21 +569,21 @@ namespace boost }; template - inline RealType find_degrees_of_freedom( + BOOST_MATH_GPU_ENABLED inline RealType find_degrees_of_freedom( RealType lam, RealType x, RealType p, RealType q, const Policy& pol) { - const char* function = "non_central_chi_squared<%1%>::find_degrees_of_freedom"; + constexpr auto function = "non_central_chi_squared<%1%>::find_degrees_of_freedom"; if((p == 0) || (q == 0)) { // // Can't a thing if one of p and q is zero: // return policies::raise_evaluation_error(function, "Can't find degrees of freedom when the probability is 0 or 1, only possible answer is %1%", // LCOV_EXCL_LINE - RealType(std::numeric_limits::quiet_NaN()), Policy()); // LCOV_EXCL_LINE + RealType(boost::math::numeric_limits::quiet_NaN()), Policy()); // LCOV_EXCL_LINE } degrees_of_freedom_finder f(lam, x, p < q ? p : q, p < q ? false : true); tools::eps_tolerance tol(policies::digits()); - std::uintmax_t max_iter = policies::get_max_root_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_root_iterations(); // // Pick an initial guess that we know will give us a probability // right around 0.5. @@ -586,7 +591,7 @@ namespace boost RealType guess = x - lam; if(guess < 1) guess = 1; - std::pair ir = tools::bracket_and_solve_root( + boost::math::pair ir = tools::bracket_and_solve_root( f, guess, RealType(2), false, tol, max_iter, pol); RealType result = ir.first + (ir.second - ir.first) / 2; if(max_iter >= policies::get_max_root_iterations()) @@ -600,11 +605,11 @@ namespace boost template struct non_centrality_finder { - non_centrality_finder( + BOOST_MATH_GPU_ENABLED non_centrality_finder( RealType v_, RealType x_, RealType p_, bool c) : v(v_), x(x_), p(p_), comp(c) {} - RealType operator()(const RealType& lam) + BOOST_MATH_GPU_ENABLED RealType operator()(const RealType& lam) { non_central_chi_squared_distribution d(v, lam); return comp ? @@ -619,21 +624,21 @@ namespace boost }; template - inline RealType find_non_centrality( + BOOST_MATH_GPU_ENABLED inline RealType find_non_centrality( RealType v, RealType x, RealType p, RealType q, const Policy& pol) { - const char* function = "non_central_chi_squared<%1%>::find_non_centrality"; + constexpr auto function = "non_central_chi_squared<%1%>::find_non_centrality"; if((p == 0) || (q == 0)) { // // Can't do a thing if one of p and q is zero: // return policies::raise_evaluation_error(function, "Can't find non centrality parameter when the probability is 0 or 1, only possible answer is %1%", // LCOV_EXCL_LINE - RealType(std::numeric_limits::quiet_NaN()), Policy()); // LCOV_EXCL_LINE + RealType(boost::math::numeric_limits::quiet_NaN()), Policy()); // LCOV_EXCL_LINE } non_centrality_finder f(v, x, p < q ? p : q, p < q ? false : true); tools::eps_tolerance tol(policies::digits()); - std::uintmax_t max_iter = policies::get_max_root_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_root_iterations(); // // Pick an initial guess that we know will give us a probability // right around 0.5. @@ -641,7 +646,7 @@ namespace boost RealType guess = x - v; if(guess < 1) guess = 1; - std::pair ir = tools::bracket_and_solve_root( + boost::math::pair ir = tools::bracket_and_solve_root( f, guess, RealType(2), false, tol, max_iter, pol); RealType result = ir.first + (ir.second - ir.first) / 2; if(max_iter >= policies::get_max_root_iterations()) @@ -661,9 +666,9 @@ namespace boost typedef RealType value_type; typedef Policy policy_type; - non_central_chi_squared_distribution(RealType df_, RealType lambda) : df(df_), ncp(lambda) + BOOST_MATH_GPU_ENABLED non_central_chi_squared_distribution(RealType df_, RealType lambda) : df(df_), ncp(lambda) { - const char* function = "boost::math::non_central_chi_squared_distribution<%1%>::non_central_chi_squared_distribution(%1%,%1%)"; + constexpr auto function = "boost::math::non_central_chi_squared_distribution<%1%>::non_central_chi_squared_distribution(%1%,%1%)"; RealType r; detail::check_df( function, @@ -675,17 +680,17 @@ namespace boost Policy()); } // non_central_chi_squared_distribution constructor. - RealType degrees_of_freedom() const + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom() const { // Private data getter function. return df; } - RealType non_centrality() const + BOOST_MATH_GPU_ENABLED RealType non_centrality() const { // Private data getter function. return ncp; } - static RealType find_degrees_of_freedom(RealType lam, RealType x, RealType p) + BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom(RealType lam, RealType x, RealType p) { - const char* function = "non_central_chi_squared<%1%>::find_degrees_of_freedom"; + constexpr auto function = "non_central_chi_squared<%1%>::find_degrees_of_freedom"; typedef typename policies::evaluation::type eval_type; typedef typename policies::normalise< Policy, @@ -704,9 +709,9 @@ namespace boost function); } template - static RealType find_degrees_of_freedom(const complemented3_type& c) + BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom(const complemented3_type& c) { - const char* function = "non_central_chi_squared<%1%>::find_degrees_of_freedom"; + constexpr auto function = "non_central_chi_squared<%1%>::find_degrees_of_freedom"; typedef typename policies::evaluation::type eval_type; typedef typename policies::normalise< Policy, @@ -724,9 +729,9 @@ namespace boost result, function); } - static RealType find_non_centrality(RealType v, RealType x, RealType p) + BOOST_MATH_GPU_ENABLED static RealType find_non_centrality(RealType v, RealType x, RealType p) { - const char* function = "non_central_chi_squared<%1%>::find_non_centrality"; + constexpr auto function = "non_central_chi_squared<%1%>::find_non_centrality"; typedef typename policies::evaluation::type eval_type; typedef typename policies::normalise< Policy, @@ -745,9 +750,9 @@ namespace boost function); } template - static RealType find_non_centrality(const complemented3_type& c) + BOOST_MATH_GPU_ENABLED static RealType find_non_centrality(const complemented3_type& c) { - const char* function = "non_central_chi_squared<%1%>::find_non_centrality"; + constexpr auto function = "non_central_chi_squared<%1%>::find_non_centrality"; typedef typename policies::evaluation::type eval_type; typedef typename policies::normalise< Policy, @@ -781,24 +786,24 @@ namespace boost // Non-member functions to give properties of the distribution. template - inline const std::pair range(const non_central_chi_squared_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair range(const non_central_chi_squared_distribution& /* dist */) { // Range of permissible values for random variable k. using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); // Max integer? + return boost::math::pair(static_cast(0), max_value()); // Max integer? } template - inline const std::pair support(const non_central_chi_squared_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair support(const non_central_chi_squared_distribution& /* dist */) { // Range of supported values for random variable k. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); + return boost::math::pair(static_cast(0), max_value()); } template - inline RealType mean(const non_central_chi_squared_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mean(const non_central_chi_squared_distribution& dist) { // Mean of poisson distribution = lambda. - const char* function = "boost::math::non_central_chi_squared_distribution<%1%>::mean()"; + constexpr auto function = "boost::math::non_central_chi_squared_distribution<%1%>::mean()"; RealType k = dist.degrees_of_freedom(); RealType l = dist.non_centrality(); RealType r; @@ -816,9 +821,9 @@ namespace boost } // mean template - inline RealType mode(const non_central_chi_squared_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mode(const non_central_chi_squared_distribution& dist) { // mode. - static const char* function = "mode(non_central_chi_squared_distribution<%1%> const&)"; + constexpr auto function = "mode(non_central_chi_squared_distribution<%1%> const&)"; RealType k = dist.degrees_of_freedom(); RealType l = dist.non_centrality(); @@ -839,9 +844,9 @@ namespace boost } template - inline RealType variance(const non_central_chi_squared_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType variance(const non_central_chi_squared_distribution& dist) { // variance. - const char* function = "boost::math::non_central_chi_squared_distribution<%1%>::variance()"; + constexpr auto function = "boost::math::non_central_chi_squared_distribution<%1%>::variance()"; RealType k = dist.degrees_of_freedom(); RealType l = dist.non_centrality(); RealType r; @@ -862,9 +867,9 @@ namespace boost // standard_deviation provided by derived accessors. template - inline RealType skewness(const non_central_chi_squared_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType skewness(const non_central_chi_squared_distribution& dist) { // skewness = sqrt(l). - const char* function = "boost::math::non_central_chi_squared_distribution<%1%>::skewness()"; + constexpr auto function = "boost::math::non_central_chi_squared_distribution<%1%>::skewness()"; RealType k = dist.degrees_of_freedom(); RealType l = dist.non_centrality(); RealType r; @@ -883,9 +888,9 @@ namespace boost } template - inline RealType kurtosis_excess(const non_central_chi_squared_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const non_central_chi_squared_distribution& dist) { - const char* function = "boost::math::non_central_chi_squared_distribution<%1%>::kurtosis_excess()"; + constexpr auto function = "boost::math::non_central_chi_squared_distribution<%1%>::kurtosis_excess()"; RealType k = dist.degrees_of_freedom(); RealType l = dist.non_centrality(); RealType r; @@ -903,21 +908,21 @@ namespace boost } // kurtosis_excess template - inline RealType kurtosis(const non_central_chi_squared_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const non_central_chi_squared_distribution& dist) { return kurtosis_excess(dist) + 3; } template - inline RealType pdf(const non_central_chi_squared_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED inline RealType pdf(const non_central_chi_squared_distribution& dist, const RealType& x) { // Probability Density/Mass Function. return detail::nccs_pdf(dist, x); } // pdf template - RealType cdf(const non_central_chi_squared_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED RealType cdf(const non_central_chi_squared_distribution& dist, const RealType& x) { - const char* function = "boost::math::non_central_chi_squared_distribution<%1%>::cdf(%1%)"; + constexpr auto function = "boost::math::non_central_chi_squared_distribution<%1%>::cdf(%1%)"; RealType k = dist.degrees_of_freedom(); RealType l = dist.non_centrality(); RealType r; @@ -942,9 +947,9 @@ namespace boost } // cdf template - RealType cdf(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { // Complemented Cumulative Distribution Function - const char* function = "boost::math::non_central_chi_squared_distribution<%1%>::cdf(%1%)"; + constexpr auto function = "boost::math::non_central_chi_squared_distribution<%1%>::cdf(%1%)"; non_central_chi_squared_distribution const& dist = c.dist; RealType x = c.param; RealType k = dist.degrees_of_freedom(); @@ -971,13 +976,13 @@ namespace boost } // ccdf template - inline RealType quantile(const non_central_chi_squared_distribution& dist, const RealType& p) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const non_central_chi_squared_distribution& dist, const RealType& p) { // Quantile (or Percent Point) function. return detail::nccs_quantile(dist, p, false); } // quantile template - inline RealType quantile(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { // Quantile (or Percent Point) function. return detail::nccs_quantile(c.dist, c.param, true); } // quantile complement. diff --git a/include/boost/math/distributions/non_central_f.hpp b/include/boost/math/distributions/non_central_f.hpp index e93d03e597..dedd437144 100644 --- a/include/boost/math/distributions/non_central_f.hpp +++ b/include/boost/math/distributions/non_central_f.hpp @@ -1,7 +1,7 @@ // boost\math\distributions\non_central_f.hpp // Copyright John Maddock 2008. - +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt @@ -10,9 +10,13 @@ #ifndef BOOST_MATH_SPECIAL_NON_CENTRAL_F_HPP #define BOOST_MATH_SPECIAL_NON_CENTRAL_F_HPP +#include +#include +#include #include #include #include +#include namespace boost { @@ -25,9 +29,9 @@ namespace boost typedef RealType value_type; typedef Policy policy_type; - non_central_f_distribution(RealType v1_, RealType v2_, RealType lambda) : v1(v1_), v2(v2_), ncp(lambda) + BOOST_MATH_GPU_ENABLED non_central_f_distribution(RealType v1_, RealType v2_, RealType lambda) : v1(v1_), v2(v2_), ncp(lambda) { - const char* function = "boost::math::non_central_f_distribution<%1%>::non_central_f_distribution(%1%,%1%)"; + constexpr auto function = "boost::math::non_central_f_distribution<%1%>::non_central_f_distribution(%1%,%1%)"; RealType r; detail::check_df( function, @@ -42,15 +46,15 @@ namespace boost Policy()); } // non_central_f_distribution constructor. - RealType degrees_of_freedom1()const + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom1()const { return v1; } - RealType degrees_of_freedom2()const + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom2()const { return v2; } - RealType non_centrality() const + BOOST_MATH_GPU_ENABLED RealType non_centrality() const { // Private data getter function. return ncp; } @@ -71,24 +75,24 @@ namespace boost // Non-member functions to give properties of the distribution. template - inline const std::pair range(const non_central_f_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair range(const non_central_f_distribution& /* dist */) { // Range of permissible values for random variable k. using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); + return boost::math::pair(static_cast(0), max_value()); } template - inline const std::pair support(const non_central_f_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair support(const non_central_f_distribution& /* dist */) { // Range of supported values for random variable k. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); + return boost::math::pair(static_cast(0), max_value()); } template - inline RealType mean(const non_central_f_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mean(const non_central_f_distribution& dist) { - const char* function = "mean(non_central_f_distribution<%1%> const&)"; + constexpr auto function = "mean(non_central_f_distribution<%1%> const&)"; RealType v1 = dist.degrees_of_freedom1(); RealType v2 = dist.degrees_of_freedom2(); RealType l = dist.non_centrality(); @@ -116,9 +120,9 @@ namespace boost } // mean template - inline RealType mode(const non_central_f_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mode(const non_central_f_distribution& dist) { // mode. - static const char* function = "mode(non_central_chi_squared_distribution<%1%> const&)"; + constexpr auto function = "mode(non_central_chi_squared_distribution<%1%> const&)"; RealType n = dist.degrees_of_freedom1(); RealType m = dist.degrees_of_freedom2(); @@ -146,9 +150,9 @@ namespace boost } template - inline RealType variance(const non_central_f_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType variance(const non_central_f_distribution& dist) { // variance. - const char* function = "variance(non_central_f_distribution<%1%> const&)"; + constexpr auto function = "variance(non_central_f_distribution<%1%> const&)"; RealType n = dist.degrees_of_freedom1(); RealType m = dist.degrees_of_freedom2(); RealType l = dist.non_centrality(); @@ -182,9 +186,9 @@ namespace boost // standard_deviation provided by derived accessors. template - inline RealType skewness(const non_central_f_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType skewness(const non_central_f_distribution& dist) { // skewness = sqrt(l). - const char* function = "skewness(non_central_f_distribution<%1%> const&)"; + constexpr auto function = "skewness(non_central_f_distribution<%1%> const&)"; BOOST_MATH_STD_USING RealType n = dist.degrees_of_freedom1(); RealType m = dist.degrees_of_freedom2(); @@ -219,9 +223,9 @@ namespace boost } template - inline RealType kurtosis_excess(const non_central_f_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const non_central_f_distribution& dist) { - const char* function = "kurtosis_excess(non_central_f_distribution<%1%> const&)"; + constexpr auto function = "kurtosis_excess(non_central_f_distribution<%1%> const&)"; BOOST_MATH_STD_USING RealType n = dist.degrees_of_freedom1(); RealType m = dist.degrees_of_freedom2(); @@ -266,13 +270,13 @@ namespace boost } // kurtosis_excess template - inline RealType kurtosis(const non_central_f_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const non_central_f_distribution& dist) { return kurtosis_excess(dist) + 3; } template - inline RealType pdf(const non_central_f_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED inline RealType pdf(const non_central_f_distribution& dist, const RealType& x) { // Probability Density/Mass Function. typedef typename policies::evaluation::type value_type; typedef typename policies::normalise< @@ -292,9 +296,9 @@ namespace boost } // pdf template - RealType cdf(const non_central_f_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED RealType cdf(const non_central_f_distribution& dist, const RealType& x) { - const char* function = "cdf(const non_central_f_distribution<%1%>&, %1%)"; + constexpr auto function = "cdf(const non_central_f_distribution<%1%>&, %1%)"; RealType r; if(!detail::check_df( function, @@ -333,9 +337,9 @@ namespace boost } // cdf template - RealType cdf(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { // Complemented Cumulative Distribution Function - const char* function = "cdf(complement(const non_central_f_distribution<%1%>&, %1%))"; + constexpr auto function = "cdf(complement(const non_central_f_distribution<%1%>&, %1%))"; RealType r; if(!detail::check_df( function, @@ -374,7 +378,7 @@ namespace boost } // ccdf template - inline RealType quantile(const non_central_f_distribution& dist, const RealType& p) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const non_central_f_distribution& dist, const RealType& p) { // Quantile (or Percent Point) function. RealType alpha = dist.degrees_of_freedom1() / 2; RealType beta = dist.degrees_of_freedom2() / 2; @@ -388,7 +392,7 @@ namespace boost } // quantile template - inline RealType quantile(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { // Quantile (or Percent Point) function. RealType alpha = c.dist.degrees_of_freedom1() / 2; RealType beta = c.dist.degrees_of_freedom2() / 2; diff --git a/include/boost/math/distributions/normal.hpp b/include/boost/math/distributions/normal.hpp index 70259e62b1..9d973fb539 100644 --- a/include/boost/math/distributions/normal.hpp +++ b/include/boost/math/distributions/normal.hpp @@ -1,6 +1,6 @@ // Copyright John Maddock 2006, 2007. // Copyright Paul A. Bristow 2006, 2007. - +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -15,13 +15,15 @@ // From MathWorld--A Wolfram Web Resource. // http://mathworld.wolfram.com/NormalDistribution.html +#include +#include +#include #include #include // for erf/erfc. #include #include - -#include -#include +#include +#include namespace boost{ namespace math{ @@ -32,32 +34,32 @@ class normal_distribution using value_type = RealType; using policy_type = Policy; - explicit normal_distribution(RealType l_mean = 0, RealType sd = 1) + BOOST_MATH_GPU_ENABLED explicit normal_distribution(RealType l_mean = 0, RealType sd = 1) : m_mean(l_mean), m_sd(sd) { // Default is a 'standard' normal distribution N01. - static const char* function = "boost::math::normal_distribution<%1%>::normal_distribution"; + constexpr auto function = "boost::math::normal_distribution<%1%>::normal_distribution"; RealType result; detail::check_scale(function, sd, &result, Policy()); detail::check_location(function, l_mean, &result, Policy()); } - RealType mean()const + BOOST_MATH_GPU_ENABLED RealType mean()const { // alias for location. return m_mean; } - RealType standard_deviation()const + BOOST_MATH_GPU_ENABLED RealType standard_deviation()const { // alias for scale. return m_sd; } // Synonyms, provided to allow generic use of find_location and find_scale. - RealType location()const + BOOST_MATH_GPU_ENABLED RealType location()const { // location. return m_mean; } - RealType scale()const + BOOST_MATH_GPU_ENABLED RealType scale()const { // scale. return m_sd; } @@ -92,30 +94,30 @@ normal_distribution(RealType)->normal_distribution -inline std::pair range(const normal_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline boost::math::pair range(const normal_distribution& /*dist*/) { // Range of permissible values for random variable x. - if (std::numeric_limits::has_infinity) + BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits::has_infinity) { - return std::pair(-std::numeric_limits::infinity(), std::numeric_limits::infinity()); // - to + infinity. + return boost::math::pair(-boost::math::numeric_limits::infinity(), boost::math::numeric_limits::infinity()); // - to + infinity. } else { // Can only use max_value. using boost::math::tools::max_value; - return std::pair(-max_value(), max_value()); // - to + max value. + return boost::math::pair(-max_value(), max_value()); // - to + max value. } } template -inline std::pair support(const normal_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline boost::math::pair support(const normal_distribution& /*dist*/) { // This is range values for random variable x where cdf rises from 0 to 1, and outside it, the pdf is zero. - if (std::numeric_limits::has_infinity) + BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits::has_infinity) { - return std::pair(-std::numeric_limits::infinity(), std::numeric_limits::infinity()); // - to + infinity. + return boost::math::pair(-boost::math::numeric_limits::infinity(), boost::math::numeric_limits::infinity()); // - to + infinity. } else { // Can only use max_value. using boost::math::tools::max_value; - return std::pair(-max_value(), max_value()); // - to + max value. + return boost::math::pair(-max_value(), max_value()); // - to + max value. } } @@ -124,14 +126,14 @@ inline std::pair support(const normal_distribution -inline RealType pdf(const normal_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType pdf(const normal_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions RealType sd = dist.standard_deviation(); RealType mean = dist.mean(); - static const char* function = "boost::math::pdf(const normal_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::pdf(const normal_distribution<%1%>&, %1%)"; RealType result = 0; if(false == detail::check_scale(function, sd, &result, Policy())) @@ -162,16 +164,16 @@ inline RealType pdf(const normal_distribution& dist, const Rea } // pdf template -inline RealType logpdf(const normal_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType logpdf(const normal_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions const RealType sd = dist.standard_deviation(); const RealType mean = dist.mean(); - static const char* function = "boost::math::logpdf(const normal_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::logpdf(const normal_distribution<%1%>&, %1%)"; - RealType result = -std::numeric_limits::infinity(); + RealType result = -boost::math::numeric_limits::infinity(); if(false == detail::check_scale(function, sd, &result, Policy())) { return result; @@ -198,13 +200,13 @@ inline RealType logpdf(const normal_distribution& dist, const } template -inline RealType cdf(const normal_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const normal_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions RealType sd = dist.standard_deviation(); RealType mean = dist.mean(); - static const char* function = "boost::math::cdf(const normal_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const normal_distribution<%1%>&, %1%)"; RealType result = 0; if(false == detail::check_scale(function, sd, &result, Policy())) { @@ -229,13 +231,13 @@ inline RealType cdf(const normal_distribution& dist, const Rea } // cdf template -inline RealType quantile(const normal_distribution& dist, const RealType& p) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const normal_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std functions RealType sd = dist.standard_deviation(); RealType mean = dist.mean(); - static const char* function = "boost::math::quantile(const normal_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const normal_distribution<%1%>&, %1%)"; RealType result = 0; if(false == detail::check_scale(function, sd, &result, Policy())) @@ -253,14 +255,14 @@ inline RealType quantile(const normal_distribution& dist, cons } // quantile template -inline RealType cdf(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions RealType sd = c.dist.standard_deviation(); RealType mean = c.dist.mean(); RealType x = c.param; - static const char* function = "boost::math::cdf(const complement(normal_distribution<%1%>&), %1%)"; + constexpr auto function = "boost::math::cdf(const complement(normal_distribution<%1%>&), %1%)"; RealType result = 0; if(false == detail::check_scale(function, sd, &result, Policy())) @@ -281,13 +283,13 @@ inline RealType cdf(const complemented2_type -inline RealType quantile(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions RealType sd = c.dist.standard_deviation(); RealType mean = c.dist.mean(); - static const char* function = "boost::math::quantile(const complement(normal_distribution<%1%>&), %1%)"; + constexpr auto function = "boost::math::quantile(const complement(normal_distribution<%1%>&), %1%)"; RealType result = 0; if(false == detail::check_scale(function, sd, &result, Policy())) return result; @@ -303,51 +305,51 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const normal_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mean(const normal_distribution& dist) { return dist.mean(); } template -inline RealType standard_deviation(const normal_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType standard_deviation(const normal_distribution& dist) { return dist.standard_deviation(); } template -inline RealType mode(const normal_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mode(const normal_distribution& dist) { return dist.mean(); } template -inline RealType median(const normal_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType median(const normal_distribution& dist) { return dist.mean(); } template -inline RealType skewness(const normal_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType skewness(const normal_distribution& /*dist*/) { return 0; } template -inline RealType kurtosis(const normal_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const normal_distribution& /*dist*/) { return 3; } template -inline RealType kurtosis_excess(const normal_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const normal_distribution& /*dist*/) { return 0; } template -inline RealType entropy(const normal_distribution & dist) +BOOST_MATH_GPU_ENABLED inline RealType entropy(const normal_distribution & dist) { - using std::log; + BOOST_MATH_STD_USING RealType arg = constants::two_pi()*constants::e()*dist.standard_deviation()*dist.standard_deviation(); return log(arg)/2; } diff --git a/include/boost/math/distributions/pareto.hpp b/include/boost/math/distributions/pareto.hpp index 778310d10e..97cf8024a0 100644 --- a/include/boost/math/distributions/pareto.hpp +++ b/include/boost/math/distributions/pareto.hpp @@ -17,16 +17,15 @@ // Handbook of Statistical Distributions with Applications, K Krishnamoorthy, ISBN 1-58488-635-8, Chapter 23, pp 257 - 267. // Caution KK's a and b are the reverse of Mathworld! +#include +#include +#include #include #include #include #include #include -#include // for BOOST_CURRENT_VALUE? -#include -#include - namespace boost { namespace math @@ -34,7 +33,7 @@ namespace boost namespace detail { // Parameter checking. template - inline bool check_pareto_scale( + BOOST_MATH_GPU_ENABLED inline bool check_pareto_scale( const char* function, RealType scale, RealType* result, const Policy& pol) @@ -63,7 +62,7 @@ namespace boost } // bool check_pareto_scale template - inline bool check_pareto_shape( + BOOST_MATH_GPU_ENABLED inline bool check_pareto_shape( const char* function, RealType shape, RealType* result, const Policy& pol) @@ -92,7 +91,7 @@ namespace boost } // bool check_pareto_shape( template - inline bool check_pareto_x( + BOOST_MATH_GPU_ENABLED inline bool check_pareto_x( const char* function, RealType const& x, RealType* result, const Policy& pol) @@ -121,7 +120,7 @@ namespace boost } // bool check_pareto_x template - inline bool check_pareto( // distribution parameters. + BOOST_MATH_GPU_ENABLED inline bool check_pareto( // distribution parameters. const char* function, RealType scale, RealType shape, @@ -140,19 +139,19 @@ namespace boost typedef RealType value_type; typedef Policy policy_type; - pareto_distribution(RealType l_scale = 1, RealType l_shape = 1) + BOOST_MATH_GPU_ENABLED pareto_distribution(RealType l_scale = 1, RealType l_shape = 1) : m_scale(l_scale), m_shape(l_shape) { // Constructor. RealType result = 0; detail::check_pareto("boost::math::pareto_distribution<%1%>::pareto_distribution", l_scale, l_shape, &result, Policy()); } - RealType scale()const + BOOST_MATH_GPU_ENABLED RealType scale()const { // AKA Xm and Wolfram b and beta return m_scale; } - RealType shape()const + BOOST_MATH_GPU_ENABLED RealType shape()const { // AKA k and Wolfram a and alpha return m_shape; } @@ -173,25 +172,25 @@ namespace boost template - inline const std::pair range(const pareto_distribution& /*dist*/) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair range(const pareto_distribution& /*dist*/) { // Range of permissible values for random variable x. using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); // scale zero to + infinity. + return boost::math::pair(static_cast(0), max_value()); // scale zero to + infinity. } // range template - inline const std::pair support(const pareto_distribution& dist) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair support(const pareto_distribution& dist) { // Range of supported values for random variable x. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. using boost::math::tools::max_value; - return std::pair(dist.scale(), max_value() ); // scale to + infinity. + return boost::math::pair(dist.scale(), max_value() ); // scale to + infinity. } // support template - inline RealType pdf(const pareto_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED inline RealType pdf(const pareto_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std function pow. - static const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; RealType scale = dist.scale(); RealType shape = dist.shape(); RealType result = 0; @@ -207,10 +206,10 @@ namespace boost } // pdf template - inline RealType cdf(const pareto_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED inline RealType cdf(const pareto_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std function pow. - static const char* function = "boost::math::cdf(const pareto_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const pareto_distribution<%1%>&, %1%)"; RealType scale = dist.scale(); RealType shape = dist.shape(); RealType result = 0; @@ -230,10 +229,10 @@ namespace boost } // cdf template - inline RealType logcdf(const pareto_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED inline RealType logcdf(const pareto_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std function pow. - static const char* function = "boost::math::logcdf(const pareto_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::logcdf(const pareto_distribution<%1%>&, %1%)"; RealType scale = dist.scale(); RealType shape = dist.shape(); RealType result = 0; @@ -244,7 +243,7 @@ namespace boost if (x <= scale) { // regardless of shape, cdf is zero. - return -std::numeric_limits::infinity(); + return -boost::math::numeric_limits::infinity(); } result = log1p(-pow(scale/x, shape), Policy()); @@ -252,10 +251,10 @@ namespace boost } // logcdf template - inline RealType quantile(const pareto_distribution& dist, const RealType& p) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const pareto_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std function pow. - static const char* function = "boost::math::quantile(const pareto_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const pareto_distribution<%1%>&, %1%)"; RealType result = 0; RealType scale = dist.scale(); RealType shape = dist.shape(); @@ -279,10 +278,10 @@ namespace boost } // quantile template - inline RealType cdf(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std function pow. - static const char* function = "boost::math::cdf(const pareto_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const pareto_distribution<%1%>&, %1%)"; RealType result = 0; RealType x = c.param; RealType scale = c.dist.scale(); @@ -301,10 +300,10 @@ namespace boost } // cdf complement template - inline RealType logcdf(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType logcdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std function pow. - static const char* function = "boost::math::logcdf(const pareto_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::logcdf(const pareto_distribution<%1%>&, %1%)"; RealType result = 0; RealType x = c.param; RealType scale = c.dist.scale(); @@ -323,10 +322,10 @@ namespace boost } // logcdf complement template - inline RealType quantile(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std function pow. - static const char* function = "boost::math::quantile(const pareto_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const pareto_distribution<%1%>&, %1%)"; RealType result = 0; RealType q = c.param; RealType scale = c.dist.scale(); @@ -350,10 +349,10 @@ namespace boost } // quantile complement template - inline RealType mean(const pareto_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mean(const pareto_distribution& dist) { RealType result = 0; - static const char* function = "boost::math::mean(const pareto_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::mean(const pareto_distribution<%1%>&, %1%)"; if(false == detail::check_pareto(function, dist.scale(), dist.shape(), &result, Policy())) { return result; @@ -370,16 +369,16 @@ namespace boost } // mean template - inline RealType mode(const pareto_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mode(const pareto_distribution& dist) { return dist.scale(); } // mode template - inline RealType median(const pareto_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType median(const pareto_distribution& dist) { RealType result = 0; - static const char* function = "boost::math::median(const pareto_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::median(const pareto_distribution<%1%>&, %1%)"; if(false == detail::check_pareto(function, dist.scale(), dist.shape(), &result, Policy())) { return result; @@ -389,12 +388,12 @@ namespace boost } // median template - inline RealType variance(const pareto_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType variance(const pareto_distribution& dist) { RealType result = 0; RealType scale = dist.scale(); RealType shape = dist.shape(); - static const char* function = "boost::math::variance(const pareto_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::variance(const pareto_distribution<%1%>&, %1%)"; if(false == detail::check_pareto(function, scale, shape, &result, Policy())) { return result; @@ -414,12 +413,12 @@ namespace boost } // variance template - inline RealType skewness(const pareto_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType skewness(const pareto_distribution& dist) { BOOST_MATH_STD_USING RealType result = 0; RealType shape = dist.shape(); - static const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; if(false == detail::check_pareto(function, dist.scale(), shape, &result, Policy())) { return result; @@ -440,11 +439,11 @@ namespace boost } // skewness template - inline RealType kurtosis(const pareto_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const pareto_distribution& dist) { RealType result = 0; RealType shape = dist.shape(); - static const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; if(false == detail::check_pareto(function, dist.scale(), shape, &result, Policy())) { return result; @@ -464,11 +463,11 @@ namespace boost } // kurtosis template - inline RealType kurtosis_excess(const pareto_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const pareto_distribution& dist) { RealType result = 0; RealType shape = dist.shape(); - static const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; if(false == detail::check_pareto(function, dist.scale(), shape, &result, Policy())) { return result; @@ -488,9 +487,9 @@ namespace boost } // kurtosis_excess template - inline RealType entropy(const pareto_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType entropy(const pareto_distribution& dist) { - using std::log; + BOOST_MATH_STD_USING RealType xm = dist.scale(); RealType alpha = dist.shape(); return log(xm/alpha) + 1 + 1/alpha; diff --git a/include/boost/math/distributions/poisson.hpp b/include/boost/math/distributions/poisson.hpp index 570a590259..c2fad66be0 100644 --- a/include/boost/math/distributions/poisson.hpp +++ b/include/boost/math/distributions/poisson.hpp @@ -2,6 +2,7 @@ // Copyright John Maddock 2006. // Copyright Paul A. Bristow 2007. +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. @@ -36,6 +37,10 @@ #ifndef BOOST_MATH_SPECIAL_POISSON_HPP #define BOOST_MATH_SPECIAL_POISSON_HPP +#include +#include +#include +#include #include #include // for incomplete gamma. gamma_q #include // for incomplete gamma. gamma_q @@ -46,9 +51,6 @@ #include // for root finding. #include -#include -#include - namespace boost { namespace math @@ -60,7 +62,7 @@ namespace boost // checks are always performed, even if exceptions are not enabled. template - inline bool check_mean(const char* function, const RealType& mean, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_mean(const char* function, const RealType& mean, RealType* result, const Policy& pol) { if(!(boost::math::isfinite)(mean) || (mean < 0)) { @@ -73,7 +75,7 @@ namespace boost } // bool check_mean template - inline bool check_mean_NZ(const char* function, const RealType& mean, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_mean_NZ(const char* function, const RealType& mean, RealType* result, const Policy& pol) { // mean == 0 is considered an error. if( !(boost::math::isfinite)(mean) || (mean <= 0)) { @@ -86,13 +88,13 @@ namespace boost } // bool check_mean_NZ template - inline bool check_dist(const char* function, const RealType& mean, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* function, const RealType& mean, RealType* result, const Policy& pol) { // Only one check, so this is redundant really but should be optimized away. return check_mean_NZ(function, mean, result, pol); } // bool check_dist template - inline bool check_k(const char* function, const RealType& k, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_k(const char* function, const RealType& k, RealType* result, const Policy& pol) { if((k < 0) || !(boost::math::isfinite)(k)) { @@ -105,7 +107,7 @@ namespace boost } // bool check_k template - inline bool check_dist_and_k(const char* function, RealType mean, RealType k, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_dist_and_k(const char* function, RealType mean, RealType k, RealType* result, const Policy& pol) { if((check_dist(function, mean, result, pol) == false) || (check_k(function, k, result, pol) == false)) @@ -116,7 +118,7 @@ namespace boost } // bool check_dist_and_k template - inline bool check_prob(const char* function, const RealType& p, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_prob(const char* function, const RealType& p, RealType* result, const Policy& pol) { // Check 0 <= p <= 1 if(!(boost::math::isfinite)(p) || (p < 0) || (p > 1)) { @@ -129,7 +131,7 @@ namespace boost } // bool check_prob template - inline bool check_dist_and_prob(const char* function, RealType mean, RealType p, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_dist_and_prob(const char* function, RealType mean, RealType p, RealType* result, const Policy& pol) { if((check_dist(function, mean, result, pol) == false) || (check_prob(function, p, result, pol) == false)) @@ -148,7 +150,7 @@ namespace boost using value_type = RealType; using policy_type = Policy; - explicit poisson_distribution(RealType l_mean = 1) : m_l(l_mean) // mean (lambda). + BOOST_MATH_GPU_ENABLED explicit poisson_distribution(RealType l_mean = 1) : m_l(l_mean) // mean (lambda). { // Expected mean number of events that occur during the given interval. RealType r; poisson_detail::check_dist( @@ -157,7 +159,7 @@ namespace boost &r, Policy()); } // poisson_distribution constructor. - RealType mean() const + BOOST_MATH_GPU_ENABLED RealType mean() const { // Private data getter function. return m_l; } @@ -176,28 +178,28 @@ namespace boost // Non-member functions to give properties of the distribution. template - inline std::pair range(const poisson_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline boost::math::pair range(const poisson_distribution& /* dist */) { // Range of permissible values for random variable k. using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); // Max integer? + return boost::math::pair(static_cast(0), max_value()); // Max integer? } template - inline std::pair support(const poisson_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline boost::math::pair support(const poisson_distribution& /* dist */) { // Range of supported values for random variable k. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); + return boost::math::pair(static_cast(0), max_value()); } template - inline RealType mean(const poisson_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mean(const poisson_distribution& dist) { // Mean of poisson distribution = lambda. return dist.mean(); } // mean template - inline RealType mode(const poisson_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mode(const poisson_distribution& dist) { // mode. BOOST_MATH_STD_USING // ADL of std functions. return floor(dist.mean()); @@ -206,7 +208,7 @@ namespace boost // Median now implemented via quantile(half) in derived accessors. template - inline RealType variance(const poisson_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType variance(const poisson_distribution& dist) { // variance. return dist.mean(); } @@ -214,14 +216,14 @@ namespace boost // standard_deviation provided by derived accessors. template - inline RealType skewness(const poisson_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType skewness(const poisson_distribution& dist) { // skewness = sqrt(l). BOOST_MATH_STD_USING // ADL of std functions. return 1 / sqrt(dist.mean()); } template - inline RealType kurtosis_excess(const poisson_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const poisson_distribution& dist) { // skewness = sqrt(l). return 1 / dist.mean(); // kurtosis_excess 1/mean from Wiki & MathWorld eq 31. // http://mathworld.wolfram.com/Kurtosis.html explains that the kurtosis excess @@ -230,7 +232,7 @@ namespace boost } // RealType kurtosis_excess template - inline RealType kurtosis(const poisson_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const poisson_distribution& dist) { // kurtosis is 4th moment about the mean = u4 / sd ^ 4 // http://en.wikipedia.org/wiki/Kurtosis // kurtosis can range from -2 (flat top) to +infinity (sharp peak & heavy tails). @@ -242,7 +244,7 @@ namespace boost } // RealType kurtosis template - RealType pdf(const poisson_distribution& dist, const RealType& k) + BOOST_MATH_GPU_ENABLED RealType pdf(const poisson_distribution& dist, const RealType& k) { // Probability Density/Mass Function. // Probability that there are EXACTLY k occurrences (or arrivals). BOOST_FPU_EXCEPTION_GUARD @@ -274,7 +276,7 @@ namespace boost } // pdf template - RealType logpdf(const poisson_distribution& dist, const RealType& k) + BOOST_MATH_GPU_ENABLED RealType logpdf(const poisson_distribution& dist, const RealType& k) { BOOST_FPU_EXCEPTION_GUARD @@ -283,7 +285,7 @@ namespace boost RealType mean = dist.mean(); // Error check: - RealType result = -std::numeric_limits::infinity(); + RealType result = -boost::math::numeric_limits::infinity(); if(false == poisson_detail::check_dist_and_k( "boost::math::pdf(const poisson_distribution<%1%>&, %1%)", mean, @@ -296,7 +298,7 @@ namespace boost // Special case of mean zero, regardless of the number of events k. if (mean == 0) { // Probability for any k is zero. - return std::numeric_limits::quiet_NaN(); + return boost::math::numeric_limits::quiet_NaN(); } // Special case where k and lambda are both positive @@ -310,7 +312,7 @@ namespace boost } template - RealType cdf(const poisson_distribution& dist, const RealType& k) + BOOST_MATH_GPU_ENABLED RealType cdf(const poisson_distribution& dist, const RealType& k) { // Cumulative Distribution Function Poisson. // The random variate k is the number of occurrences(or arrivals) // k argument may be integral, signed, or unsigned, or floating point. @@ -361,7 +363,7 @@ namespace boost } // binomial cdf template - RealType cdf(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { // Complemented Cumulative Distribution Function Poisson // The random variate k is the number of events, occurrences or arrivals. // k argument may be integral, signed, or unsigned, or floating point. @@ -411,10 +413,10 @@ namespace boost } // poisson ccdf template - inline RealType quantile(const poisson_distribution& dist, const RealType& p) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const poisson_distribution& dist, const RealType& p) { // Quantile (or Percent Point) Poisson function. // Return the number of expected events k for a given probability p. - static const char* function = "boost::math::quantile(const poisson_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const poisson_distribution<%1%>&, %1%)"; RealType result = 0; // of Argument checks: if(false == poisson_detail::check_prob( function, @@ -443,7 +445,7 @@ namespace boost return policies::raise_overflow_error(function, 0, Policy()); } using discrete_type = typename Policy::discrete_quantile_type; - std::uintmax_t max_iter = policies::get_max_root_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_root_iterations(); RealType guess; RealType factor = 8; RealType z = dist.mean(); @@ -477,13 +479,13 @@ namespace boost } // quantile template - inline RealType quantile(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { // Quantile (or Percent Point) of Poisson function. // Return the number of expected events k for a given // complement of the probability q. // // Error checks: - static const char* function = "boost::math::quantile(complement(const poisson_distribution<%1%>&, %1%))"; + constexpr auto function = "boost::math::quantile(complement(const poisson_distribution<%1%>&, %1%))"; RealType q = c.param; const poisson_distribution& dist = c.dist; RealType result = 0; // of argument checks. @@ -514,7 +516,7 @@ namespace boost return 0; // Exact result regardless of discrete-quantile Policy } using discrete_type = typename Policy::discrete_quantile_type; - std::uintmax_t max_iter = policies::get_max_root_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_root_iterations(); RealType guess; RealType factor = 8; RealType z = dist.mean(); diff --git a/include/boost/math/distributions/rayleigh.hpp b/include/boost/math/distributions/rayleigh.hpp index 4e741313c8..155525b539 100644 --- a/include/boost/math/distributions/rayleigh.hpp +++ b/include/boost/math/distributions/rayleigh.hpp @@ -7,6 +7,10 @@ #ifndef BOOST_STATS_rayleigh_HPP #define BOOST_STATS_rayleigh_HPP +#include +#include +#include +#include #include #include #include @@ -19,16 +23,12 @@ # pragma warning(disable: 4702) // unreachable code (return after domain_error throw). #endif -#include -#include -#include - namespace boost{ namespace math{ namespace detail { // Error checks: template - inline bool verify_sigma(const char* function, RealType sigma, RealType* presult, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool verify_sigma(const char* function, RealType sigma, RealType* presult, const Policy& pol) { if((sigma <= 0) || (!(boost::math::isfinite)(sigma))) { @@ -41,7 +41,7 @@ namespace detail } // bool verify_sigma template - inline bool verify_rayleigh_x(const char* function, RealType x, RealType* presult, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool verify_rayleigh_x(const char* function, RealType x, RealType* presult, const Policy& pol) { if((x < 0) || (boost::math::isnan)(x)) { @@ -61,14 +61,14 @@ class rayleigh_distribution using value_type = RealType; using policy_type = Policy; - explicit rayleigh_distribution(RealType l_sigma = 1) + BOOST_MATH_GPU_ENABLED explicit rayleigh_distribution(RealType l_sigma = 1) : m_sigma(l_sigma) { RealType err; detail::verify_sigma("boost::math::rayleigh_distribution<%1%>::rayleigh_distribution", l_sigma, &err, Policy()); } // rayleigh_distribution - RealType sigma()const + BOOST_MATH_GPU_ENABLED RealType sigma()const { // Accessor. return m_sigma; } @@ -85,28 +85,28 @@ rayleigh_distribution(RealType)->rayleigh_distribution -inline std::pair range(const rayleigh_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline boost::math::pair range(const rayleigh_distribution& /*dist*/) { // Range of permissible values for random variable x. using boost::math::tools::max_value; - return std::pair(static_cast(0), std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : max_value()); + return boost::math::pair(static_cast(0), boost::math::numeric_limits::has_infinity ? boost::math::numeric_limits::infinity() : max_value()); } template -inline std::pair support(const rayleigh_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline boost::math::pair support(const rayleigh_distribution& /*dist*/) { // Range of supported values for random variable x. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); + return boost::math::pair(static_cast(0), max_value()); } template -inline RealType pdf(const rayleigh_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType pdf(const rayleigh_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std function exp. RealType sigma = dist.sigma(); RealType result = 0; - static const char* function = "boost::math::pdf(const rayleigh_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::pdf(const rayleigh_distribution<%1%>&, %1%)"; if(false == detail::verify_sigma(function, sigma, &result, Policy())) { return result; @@ -125,13 +125,13 @@ inline RealType pdf(const rayleigh_distribution& dist, const R } // pdf template -inline RealType logpdf(const rayleigh_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType logpdf(const rayleigh_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std function exp. const RealType sigma = dist.sigma(); - RealType result = -std::numeric_limits::infinity(); - static const char* function = "boost::math::logpdf(const rayleigh_distribution<%1%>&, %1%)"; + RealType result = -boost::math::numeric_limits::infinity(); + constexpr auto function = "boost::math::logpdf(const rayleigh_distribution<%1%>&, %1%)"; if(false == detail::verify_sigma(function, sigma, &result, Policy())) { @@ -151,13 +151,13 @@ inline RealType logpdf(const rayleigh_distribution& dist, cons } // logpdf template -inline RealType cdf(const rayleigh_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const rayleigh_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions RealType result = 0; RealType sigma = dist.sigma(); - static const char* function = "boost::math::cdf(const rayleigh_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const rayleigh_distribution<%1%>&, %1%)"; if(false == detail::verify_sigma(function, sigma, &result, Policy())) { return result; @@ -171,33 +171,33 @@ inline RealType cdf(const rayleigh_distribution& dist, const R } // cdf template -inline RealType logcdf(const rayleigh_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType logcdf(const rayleigh_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions RealType result = 0; RealType sigma = dist.sigma(); - static const char* function = "boost::math::logcdf(const rayleigh_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::logcdf(const rayleigh_distribution<%1%>&, %1%)"; if(false == detail::verify_sigma(function, sigma, &result, Policy())) { - return -std::numeric_limits::infinity(); + return -boost::math::numeric_limits::infinity(); } if(false == detail::verify_rayleigh_x(function, x, &result, Policy())) { - return -std::numeric_limits::infinity(); + return -boost::math::numeric_limits::infinity(); } result = log1p(-exp(-x * x / ( 2 * sigma * sigma)), Policy()); return result; } // logcdf template -inline RealType quantile(const rayleigh_distribution& dist, const RealType& p) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const rayleigh_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std functions RealType result = 0; RealType sigma = dist.sigma(); - static const char* function = "boost::math::quantile(const rayleigh_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const rayleigh_distribution<%1%>&, %1%)"; if(false == detail::verify_sigma(function, sigma, &result, Policy())) return result; if(false == detail::check_probability(function, p, &result, Policy())) @@ -216,13 +216,13 @@ inline RealType quantile(const rayleigh_distribution& dist, co } // quantile template -inline RealType cdf(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions RealType result = 0; RealType sigma = c.dist.sigma(); - static const char* function = "boost::math::cdf(const rayleigh_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const rayleigh_distribution<%1%>&, %1%)"; if(false == detail::verify_sigma(function, sigma, &result, Policy())) { return result; @@ -241,21 +241,21 @@ inline RealType cdf(const complemented2_type -inline RealType logcdf(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType logcdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions RealType result = 0; RealType sigma = c.dist.sigma(); - static const char* function = "boost::math::logcdf(const rayleigh_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::logcdf(const rayleigh_distribution<%1%>&, %1%)"; if(false == detail::verify_sigma(function, sigma, &result, Policy())) { - return -std::numeric_limits::infinity(); + return -boost::math::numeric_limits::infinity(); } RealType x = c.param; if(false == detail::verify_rayleigh_x(function, x, &result, Policy())) { - return -std::numeric_limits::infinity(); + return -boost::math::numeric_limits::infinity(); } RealType ea = x * x / (2 * sigma * sigma); // Fix for VC11/12 x64 bug in exp(float): @@ -266,13 +266,13 @@ inline RealType logcdf(const complemented2_type -inline RealType quantile(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions, log & sqrt. RealType result = 0; RealType sigma = c.dist.sigma(); - static const char* function = "boost::math::quantile(const rayleigh_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const rayleigh_distribution<%1%>&, %1%)"; if(false == detail::verify_sigma(function, sigma, &result, Policy())) { return result; @@ -295,11 +295,11 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const rayleigh_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mean(const rayleigh_distribution& dist) { RealType result = 0; RealType sigma = dist.sigma(); - static const char* function = "boost::math::mean(const rayleigh_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::mean(const rayleigh_distribution<%1%>&, %1%)"; if(false == detail::verify_sigma(function, sigma, &result, Policy())) { return result; @@ -309,11 +309,11 @@ inline RealType mean(const rayleigh_distribution& dist) } // mean template -inline RealType variance(const rayleigh_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType variance(const rayleigh_distribution& dist) { RealType result = 0; RealType sigma = dist.sigma(); - static const char* function = "boost::math::variance(const rayleigh_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::variance(const rayleigh_distribution<%1%>&, %1%)"; if(false == detail::verify_sigma(function, sigma, &result, Policy())) { return result; @@ -323,20 +323,20 @@ inline RealType variance(const rayleigh_distribution& dist) } // variance template -inline RealType mode(const rayleigh_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mode(const rayleigh_distribution& dist) { return dist.sigma(); } template -inline RealType median(const rayleigh_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType median(const rayleigh_distribution& dist) { using boost::math::constants::root_ln_four; return root_ln_four() * dist.sigma(); } template -inline RealType skewness(const rayleigh_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType skewness(const rayleigh_distribution& /*dist*/) { return static_cast(0.63111065781893713819189935154422777984404221106391L); // Computed using NTL at 150 bit, about 50 decimal digits. @@ -344,7 +344,7 @@ inline RealType skewness(const rayleigh_distribution& /*dist*/ } template -inline RealType kurtosis(const rayleigh_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const rayleigh_distribution& /*dist*/) { return static_cast(3.2450893006876380628486604106197544154170667057995L); // Computed using NTL at 150 bit, about 50 decimal digits. @@ -352,7 +352,7 @@ inline RealType kurtosis(const rayleigh_distribution& /*dist*/ } template -inline RealType kurtosis_excess(const rayleigh_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const rayleigh_distribution& /*dist*/) { return static_cast(0.2450893006876380628486604106197544154170667057995L); // Computed using NTL at 150 bit, about 50 decimal digits. @@ -360,9 +360,9 @@ inline RealType kurtosis_excess(const rayleigh_distribution& / } // kurtosis_excess template -inline RealType entropy(const rayleigh_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType entropy(const rayleigh_distribution& dist) { - using std::log; + BOOST_MATH_STD_USING return 1 + log(dist.sigma()*constants::one_div_root_two()) + constants::euler()/2; } diff --git a/include/boost/math/distributions/saspoint5.hpp b/include/boost/math/distributions/saspoint5.hpp new file mode 100644 index 0000000000..7846b99560 --- /dev/null +++ b/include/boost/math/distributions/saspoint5.hpp @@ -0,0 +1,2796 @@ +// Copyright Takuma Yoshimura 2024. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_STATS_SASPOINT5_HPP +#define BOOST_STATS_SASPOINT5_HPP + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4127) // conditional expression is constant +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef BOOST_MATH_HAS_NVRTC +#include +#include +#endif + +namespace boost { namespace math { +template +class saspoint5_distribution; + +namespace detail { + +template +BOOST_MATH_GPU_ENABLED inline RealType saspoint5_pdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x < 0.125) { + // Rational Approximation + // Maximum Relative Error: 7.8747e-17 + BOOST_MATH_STATIC const RealType P[13] = { + static_cast(6.36619772367581343076e-1), + static_cast(2.17275699713513462507e2), + static_cast(3.49063163361344578910e4), + static_cast(3.40332906932698464252e6), + static_cast(2.19485577044357440949e8), + static_cast(9.66086435948730562464e9), + static_cast(2.90571833690383003932e11), + static_cast(5.83089315593106044683e12), + static_cast(7.37911022713775715766e13), + static_cast(5.26757196603002476852e14), + static_cast(1.75780353683063527570e15), + static_cast(1.85883041942144306222e15), + static_cast(4.19828222275972713819e14), + }; + BOOST_MATH_STATIC const RealType Q[15] = { + static_cast(1.), + static_cast(3.41295871011779138155e2), + static_cast(5.48907134827349102297e4), + static_cast(5.36641455324410261980e6), + static_cast(3.48045461004960397915e8), + static_cast(1.54920747349701741537e10), + static_cast(4.76490595358644532404e11), + static_cast(1.00104823128402735005e13), + static_cast(1.39703522470411802507e14), + static_cast(1.23724881334160220266e15), + static_cast(6.47437580921138359461e15), + static_cast(1.77627318260037604066e16), + static_cast(2.04792815832538146160e16), + static_cast(7.45102534638640681964e15), + static_cast(3.68496090049571174527e14), + }; + + result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x); + } + else if (x < 0.25) { + RealType t = x - static_cast (0.125); + + // Rational Approximation + // Maximum Relative Error: 2.1471e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(4.35668401768623200524e-1), + static_cast(7.12477357389655327116e0), + static_cast(4.02466317948738993787e1), + static_cast(9.04888497628205955839e1), + static_cast(7.56175387288619211460e1), + static_cast(1.26950253999694502457e1), + static_cast(-6.59304802132933325219e-1), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(1.98623818041545101115e1), + static_cast(1.52856383017632616759e2), + static_cast(5.70706902111659740041e2), + static_cast(1.06454927680197927878e3), + static_cast(9.13160352749764887791e2), + static_cast(2.58872466837209126618e2), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 0.5) { + RealType t = x - static_cast (0.25); + + // Rational Approximation + // Maximum Relative Error: 5.3265e-17 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(2.95645445681747568732e-1), + static_cast(2.23779537590791610124e0), + static_cast(5.01302198171248036052e0), + static_cast(2.76363131116340641935e0), + static_cast(1.18134858311074670327e-1), + static_cast(2.00287083462139382715e-2), + static_cast(-7.53979800555375661516e-3), + static_cast(1.37294648777729527395e-3), + }; + BOOST_MATH_STATIC const RealType Q[6] = { + static_cast(1.), + static_cast(1.02879626214781666701e1), + static_cast(3.85125274509784615691e1), + static_cast(6.18474367367800231625e1), + static_cast(3.77100050087302476029e1), + static_cast(5.41866360740066443656e0), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 1) { + RealType t = x - static_cast (0.5); + + // Rational Approximation + // Maximum Relative Error: 2.7947e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(1.70762401725206223811e-1), + static_cast(8.43343631021918972436e-1), + static_cast(1.39703819152564365627e0), + static_cast(8.75843324574692085009e-1), + static_cast(1.86199552443747562584e-1), + static_cast(7.35858280181579907616e-3), + static_cast(-1.03693607694266081126e-4), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(6.73363440952557318819e0), + static_cast(1.74288966619209299976e1), + static_cast(2.15943268035083671893e1), + static_cast(1.29818726981381859879e1), + static_cast(3.40707211426946022041e0), + static_cast(2.80229012541729457678e-1), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 2) { + RealType t = x - 1; + + // Rational Approximation + // Maximum Relative Error: 1.7051e-18 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(8.61071469126041183247e-2), + static_cast(1.69689585946245345838e-1), + static_cast(1.09494833291892212033e-1), + static_cast(2.76619622453130604637e-2), + static_cast(2.44972748006913061509e-3), + static_cast(4.09853605772288438003e-5), + static_cast(-2.63561415158954865283e-7), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(3.04082856018856244947e0), + static_cast(3.52558663323956252986e0), + static_cast(1.94795523079701426332e0), + static_cast(5.23956733400745421623e-1), + static_cast(6.19453597593998871667e-2), + static_cast(2.31061984192347753499e-3), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 4) { + RealType t = x - 2; + + // Rational Approximation + // Maximum Relative Error: 2.9247e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(3.91428580496513429479e-2), + static_cast(4.07162484034780126757e-2), + static_cast(1.43342733342753081931e-2), + static_cast(2.01622178115394696215e-3), + static_cast(1.00648013467757737201e-4), + static_cast(9.51545046750892356441e-7), + static_cast(-3.56598940936439037087e-9), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(1.63904431617187026619e0), + static_cast(1.03812003196677309121e0), + static_cast(3.18144310790210668797e-1), + static_cast(4.81930155615666517263e-2), + static_cast(3.25435391589941361778e-3), + static_cast(7.01626957128181647457e-5), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 8) { + RealType t = x - 4; + + // Rational Approximation + // Maximum Relative Error: 2.6547e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(1.65057384221262866484e-2), + static_cast(8.05429762031495873704e-3), + static_cast(1.35249234647852784985e-3), + static_cast(9.18685252682786794440e-5), + static_cast(2.23447790937806602674e-6), + static_cast(1.03176916111395079569e-8), + static_cast(-1.94913182592441292094e-11), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(8.10113554189626079232e-1), + static_cast(2.54175325409968367580e-1), + static_cast(3.87119072807894983910e-2), + static_cast(2.92520770162792443587e-3), + static_cast(9.89094130526684467420e-5), + static_cast(1.07148513311070719488e-6), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 16) { + RealType t = x - 8; + + // Rational Approximation + // Maximum Relative Error: 2.5484e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(6.60044810497290557553e-3), + static_cast(1.59342644994950292031e-3), + static_cast(1.32429706922966110874e-4), + static_cast(4.45378136978435909660e-6), + static_cast(5.36409958111394628239e-8), + static_cast(1.22293787679910067873e-10), + static_cast(-1.16300443044165216564e-13), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(4.10446485803039594111e-1), + static_cast(6.51887342399859289520e-2), + static_cast(5.02151225308643905366e-3), + static_cast(1.91741179639551137839e-4), + static_cast(3.27316600311598190022e-6), + static_cast(1.78840301213102212857e-8), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 32) { + RealType t = x - 16; + + // Rational Approximation + // Maximum Relative Error: 2.9866e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(2.54339461777955741686e-3), + static_cast(3.10069525357852579756e-4), + static_cast(1.30082682796085732756e-5), + static_cast(2.20715868479255585050e-7), + static_cast(1.33996659756026452288e-9), + static_cast(1.53505360463827994365e-12), + static_cast(-7.42649416356965421308e-16), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(2.09203384450859785642e-1), + static_cast(1.69422626897631306130e-2), + static_cast(6.65649059670689720386e-4), + static_cast(1.29654785666009849481e-5), + static_cast(1.12886139474560969619e-7), + static_cast(3.14420104899170413840e-10), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 64) { + RealType t = x - 32; + + // Rational Approximation + // Maximum Relative Error: 3.3581e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(9.55085695067883584460e-4), + static_cast(5.86125496733202756668e-5), + static_cast(1.23753971325810931282e-6), + static_cast(1.05643819745933041408e-8), + static_cast(3.22502949410095015524e-11), + static_cast(1.85366144680157942079e-14), + static_cast(-4.53975807317403152058e-18), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(1.05980850386474826374e-1), + static_cast(4.34966042652000070674e-3), + static_cast(8.66341538387446465700e-5), + static_cast(8.55608082202236124363e-7), + static_cast(3.77719968378509293354e-9), + static_cast(5.33287361559571716670e-12), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + RealType t = 1 / sqrt(x); + + // Rational Approximation + // Maximum Relative Error: 4.7450e-19 + BOOST_MATH_STATIC const RealType P[5] = { + static_cast(1.99471140200716338970e-1), + static_cast(-1.93310094131437487158e-2), + static_cast(-8.44282614309073196195e-3), + static_cast(3.47296024282356038069e-3), + static_cast(-4.05398011689821941383e-4), + }; + BOOST_MATH_STATIC const RealType Q[5] = { + static_cast(1.), + static_cast(7.00973251258577238892e-1), + static_cast(2.66969681258835723157e-1), + static_cast(5.51785147503612200456e-2), + static_cast(6.50130030979966274341e-3), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t / x; + } + + return result; +} + + +template +BOOST_MATH_GPU_ENABLED inline RealType saspoint5_pdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x < 0.0625) { + // Rational Approximation + // Maximum Relative Error: 8.8841e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[27] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36619772367581343075535053490057448138e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.57459506929453385798277946154823008327e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.46717322844023441698710451505816706570e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71501459971530549476153273173061194095e8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.76700973495278431084530045707075552432e10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01328150775099946510145440412520620021e13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70028222513668830210058353057559790101e15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29641781943744384078006991488193839955e17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.52611994112742436432957758588495082163e19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.27833177267552931459542318826727288124e21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.68946162731840551853993619351896931533e23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02965010233956763504899745874128908220e25), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.14128569264874914146628076133997950655e26), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.09103580386900060922163883603492216942e28), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.86778299087452621293332172137014749128e29), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.80029712249744334924217328667885673985e31), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.70890080432228368476255091774238573277e32), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.88600513999992354909078399482884993261e33), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.01189178534848836605739139176681647755e34), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.06531475170803043941021113424602440078e35), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.64956999370443524098457423629252855270e36), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44276098283517934229787916584447559248e37), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.45856704224433991524661028965741649584e37), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.47263237190968408624388275549716907309e37), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.66186300951901408251743228798832386260e37), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.48064966533519934186356663849904556319e36), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.64877082086372991309408001661535573441e35), + }; + BOOST_MATH_STATIC const RealType Q[28] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18981461118065892086304195732751798634e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.01761929839041982958990681130944341399e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.69465252239913021973760046507387620537e8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.49221044103838155300076098325950584061e10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59327386289821190042576978177896481082e13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.67528179803224728786405503232064643870e15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.61672367849271591791062829736720884633e17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.98395893917909208201801908435620016552e19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.60025693881358827551113845076726845495e21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.67730578745705562356709169493821118109e23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63843883526710042156562706339553092312e25), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.23075214698024188140971761421762265880e26), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.37775321923937393366376907114580842429e28), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12444724625354796650300159037364355605e30), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.00860835602766063447009568106012449767e31), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.39614080159468893509273006948526469708e32), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06547095715472468415058181351212520255e34), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36755997709303811764051969789337337957e35), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32519530489892818585066019217287415587e36), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.45230390606834183602522256278256501404e36), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.80344475131699029428900627020022801971e37), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66469314795307459840482483320814279444e38), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.70209065673736156218117594311801487932e38), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.84490531246108754748100009460860427732e38), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.30215083398643966091721732133851539475e38), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.58032845332990262754766784625271262271e37), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.50461648438613634025964361513066059697e36), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x); + } + else if (x < 0.125) { + RealType t = x - static_cast (0.0625); + + // Rational Approximation + // Maximum Relative Error: 3.4585e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.46416716200748206779925127900698754119e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.41771273526123373239570033672829787791e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.56142610225585235535211648703534340871e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.15655694129872563686497490176725921724e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.00791883661952751945853742455643714995e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.30252591667828615354689186280704562254e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76168115448224677276551213052798322583e7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.88534624532179841393387625270218172719e7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14740447137831585842166880265350244623e8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14904082614021239315925958812100948136e8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.76866867279164114004579652405104553404e7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.53475339598769347326916978463911377965e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.88896160275915786487519266368539625326e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05543800791717482823610940401201712196e4), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.44407579416524903840331499438398472639e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.15911780811299460009161345260146251462e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.88457596285725454686358792906273558406e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.66501639812506059997744549411633476528e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.12674134216028769532305433586266118000e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.87676063477990584593444083577765264392e7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.56084282739608760299329382263598821653e8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.34250986378665047914811630036201995871e8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31288233106689286803200674021353188597e9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33621494302241474082474689597125896975e9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.63379428046258653791600947328520263412e8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14558538557562267533922961110917101850e8), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 0.25) { + RealType t = x - static_cast (0.125); + + // Rational Approximation + // Maximum Relative Error: 6.9278e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.35668401768623200524372663239480799018e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30066509937988171489091367354416214000e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.05924026744937322690717755156090122074e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.12998524955326375684693500551926325112e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.52237930808361186011042950178715609183e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.10734809597587633852077152938985998879e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.20796157836149826988172603622242119074e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12398478061053302537736799402801934778e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17841330491647012385157454335820786724e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.46281413765362795389526259057436151953e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52220357379402116641048490644093497829e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.51130316105543847380510577656570543736e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.32201781975497810173532067354797097401e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.96874547436310030183519174847668703774e0), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.63164311578114868477819520857286165076e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34964379844144961683927306966955217328e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.82966031793809959278519002412667883288e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.56215285850856046267451500310816276675e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.81046679663412610005501878092824281161e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.33868038251479411246071640628518434659e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.46262495881941625571640264458627940579e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40052628730443097561652737049917920495e7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44394803828297754346261138417756941544e7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.56647617803506258343236509255155360957e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.53513095899009948733175317927025056561e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.69130675750530663088963759279778748696e5), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 0.5) { + RealType t = x - static_cast (0.25); + + // Rational Approximation + // Maximum Relative Error: 6.9378e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.95645445681747568731488283573032414811e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.83246437763964151893665752064650172391e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.85333417559435252576820440080930004674e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.90974714199542064991001365628659054084e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39707205668285805800884524044738261436e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.24814598419826565698241508792385416075e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.95012897118808793886195172068123345314e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.87265743900139300849404272909665705025e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.98795164648056126707212245325405968413e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07012128790318535418330629467906917213e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99797198893523173981812955075412130913e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.55029227544167913873724286459253168886e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.54064889901609722583601330171719819660e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.72254289950537680833853394958874977464e-3), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.58291085070053442257438623486099473087e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.95618461039379226195473938654286975682e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.97427161745150579714266897556974326502e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.52730436681412535198281529590508861106e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.49521185356761585062135933350225236726e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.03881178612341724262911142022761966061e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.02360046338629039644581819847209730553e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.65580339066083507998465454599272345735e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.15462499626138125314518636645472893045e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.61951767959774678843021179589300545717e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.60745557054877240279811529503888551492e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.91061555870569579915258835459255406575e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43045229010040855016672246098687100063e1), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 1) { + RealType t = x - static_cast (0.5); + + // Rational Approximation + // Maximum Relative Error: 6.4363e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70762401725206223811383500786268939645e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19353011456197635663058525904929358535e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22974648900600015961253465796487372402e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.91951696059042324975935209295355569292e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.79039444119906169910281912009369164227e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00089963120992100860902142265631127046e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.37108883429306700857182028809960789020e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.49586566873564432788366931251358248417e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.49790521605774884174840168128255220471e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.90660338063979435668763608259382712726e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.93409982383888149064797608605579930804e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22802459215932860445033185874876812040e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07739227340181463034286653569468171767e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.02669738424010290973023004028523684766e-7), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46404480283267324138113869370306506431e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.54550184643308468933661600211579108422e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.63602410602063476726031476852965502123e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.94463479638213888403144706176973026333e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48607087483870766806529883069123352339e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.69715692924508994524755312953665710218e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33237849965272853370191827043868842100e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.08460086451666825383009487734769646087e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.47365552394788536087148438788608689300e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.38010282703940184371247559455167674975e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.67219842525655806370702248122668214685e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.01852843874982199859775136086676841910e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.14767043526088185802569803397824432028e-3), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 2) { + RealType t = x - 1; + + // Rational Approximation + // Maximum Relative Error: 9.1244e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.61071469126041183247373313827161939454e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.35837460186564880289965856498718321896e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.47783071967681246738651796742079530382e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16019502727107539284403003943433359877e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.80510046274709592896987229782879937271e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.30456542768955299533391113704078540955e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36539167913428133313942008990965988621e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.76450743657913389896743235938695682829e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.42847090205575096649865021874905747106e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41380341540026027117735179862124402398e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.40549721587212773424211923602910622515e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09089653391032945883918434200567278139e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21403900721572475664926557233205232491e-10), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.13172035933794917563324458011617112124e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.65687100738157412154132860910003018338e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.59672433683883998168388916533196510994e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.61469557815097583209668778301921207455e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.77070955301136405523492329700943077340e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.20825570431301943907348077675777546304e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.60136197167727810483751794121979805142e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.53723076053642006159503073104152703814e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.63397465217490984394478518334313362490e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.40577918603319523990542237990107206371e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.94376458316662573143947719026985667328e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.09333568224541559157192543410988474886e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.59947287428695057506683902409023760438e-8), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 4) { + RealType t = x - 2; + + // Rational Approximation + // Maximum Relative Error: 8.1110e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.91428580496513429479068747515164587814e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.69015019070193436467106672180804948494e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.03147451266231819912643754579290008651e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.18825170881552297150779588545792258740e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30548850262278582401286533053286406505e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.54315108501815531776138839512564427279e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.66434584176931077662201101557716482514e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.66158632576958238392567355014249971287e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.31365802206301246598393821671437863818e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85378389166807263837732376845556856416e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.20375363151456683883984823721339648679e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06637401794693307359898089790558771957e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.08663671047376684678494625068451888284e-14), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.07443397096591141329212291707948432414e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.16665031056584124503224711639009530348e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.27666060511630720485121299731204403783e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.65646979169107732387032821262953301311e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.63594064986880863092994744424349361396e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31360114173642293100378020953197965181e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.09489929949457075237756409511944811481e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.24574519309785870806550506199124944514e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.56048486483867679310086683710523566607e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.60417286783794818094722636906776809193e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.53154117367296710469692755461431646999e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60041713691072903334637560080298818163e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.77381528950794767694352468734042252745e-12), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 8) { + RealType t = x - 4; + + // Rational Approximation + // Maximum Relative Error: 2.5228e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.65057384221262866484014802392420311075e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92801687242885330588201777283015178448e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.65508815862861196424333614846876229064e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.71545573465295958468808641544341412235e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72077718130407940498710469661947719216e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.26299620525538984108147098966692839348e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.77971404992990847565880351976461271350e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71176235845517643695464740679643640241e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.64603919225244695533557520384631958897e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.85274347406803894317891882905083368489e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.48564096627181435612831469651920186491e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.90886715044580341917806394089282500340e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.39396206221935864416563232680283312796e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.37760675743046300528308203869876086823e-22), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.49023284463742780238035958819642738891e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.76284367953836866133894756472541395734e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.69932155343422362573146811195224195135e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.97593541520549770519034085640975455763e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45862809001322359249894968573830094537e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.61348135835522976885804369721316193713e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21069949470458047530981551232427019037e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.03132437580490629136144285669590192597e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.91030348024641585284338958059030520141e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.56320479309161046934628280237629402373e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.39524198476052364627683067034422502163e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18666081063885228839052386515073873844e-13), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 16) { + RealType t = x - 8; + + // Rational Approximation + // Maximum Relative Error: 9.6732e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.60044810497290557552736366450372523266e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.27034183360438185616541260923634443241e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19813403884333707962156711479716066536e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.91346554854771687970018076643044998737e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.91975837766081548424458764226669789039e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26031304514411902758114277797443618334e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.47127194811140370123712253347211626753e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.55248861254135821097921903190564312000e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78340847719683652633864722047250151066e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.95888612422041337572422846394029849086e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66363005792960308636467394552324255493e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.93244800648299424751906591077496534948e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.95046217952146113063614290717113024410e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.46784746963816915795587433372284530785e-25), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.16012189991825507132967712656930682478e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.95202772611563835130347051925062280272e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.23801477561401113332870463345197159418e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.54022665579711946784722766000062263305e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.94266182294627770206082679848878391116e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.11782839184878848480753630961211685630e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.28827067686094594197542725283923947812e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.00220719177374237332018587370837457299e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.42250513143925626748132661121749401409e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.82007216963767723991309138907689681422e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34214834652884406013489167210936679359e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.85519293212465087373898447546710143008e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.96728437809303144188312623363453475831e-19), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 32) { + RealType t = x - 16; + + // Rational Approximation + // Maximum Relative Error: 1.0113e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.54339461777955741686401041938275102207e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.17747085249877439037826121862689145081e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14104576580586095462211756659036062930e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.06903778663262313120049231822412184382e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.53115958954246158081703822428768781010e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48225007017630665357941682179157662142e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.20810829523286181556951002345409843125e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.54070972719909957155251432996372246019e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.06258623970363729581390609798632080752e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.15603641527498625694677136504611545743e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.05376970060354261667000502105893106009e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.14727542705613448694396750352455931731e-22), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76883960167449461476228984331517762578e-25), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.03558202009465610972808653993060437679e-29), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.08809672969012756295937194823378109391e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.41148083436617376855422685448827300528e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.85101541143091590863368934606849033688e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.38984899982960112626157576750593711628e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.51437845497783812562009857096371643785e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.12891276596072815764119699444334380521e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.82412500887161687329929693518498698716e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.80215715026891688444965605768621763721e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.85838684678780184082810752634454259831e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83675729736846176693608812315852523556e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.80347165008408134158968403924819637224e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23639219622240634094606955067799349447e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.63446235885036169537726818244420509024e-23), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 64) { + RealType t = x - 32; + + // Rational Approximation + // Maximum Relative Error: 9.7056e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.55085695067883584460317653567009454037e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52919532248638251721278667010429548877e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06266842295477991789450356745903177571e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.20671609948319334255323512011575892813e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04692714549374449244320605137676408001e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70605481454469287545965803970738264158e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83960996572005209177458712170004097587e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29732261733491885750067029092181853751e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.78385693918239619309147428897790440735e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52969197316398995616879018998891661712e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.14063120299947677255281707434419044806e-22), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.25957675329657493245893497219459256248e-25), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.55238112862817593053765898004447484717e-29), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.93970406521541790658675747195982964585e-34), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.04722757068068234153968603374387493579e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85854131835804458353300285777969427206e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.85809281481040288085436275150792074968e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.38860750164285700051427698379841626305e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.91463283601681120487987016215594255423e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28104952818420195583669572450494959042e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43912720109615655035554724090181888734e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10668954229813492117417896681856998595e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.65093571330749369067212003571435698558e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.81758227619561958470583781325371429458e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.36970757752002915423191164330598255294e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.06487673393164724939989217811068656932e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.47121057452822097779067717258050172115e-27), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + RealType t = 1 / sqrt(x); + + // Rational Approximation + // Maximum Relative Error: 7.1032e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[8] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99471140200716338969973029967190934238e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.82846732476244747063962056024672844211e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.69724475658159099827638225237895868258e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21259630917863228526439367416146293173e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.13469812721679130825429547254346177005e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.73237434182338329541631611908947123606e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72986150007117100707304201395140411630e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.53567129749337040254350979652515879881e-7), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.89815449697874475254942178935516387239e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.21223228867921988134838870379132038419e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.79514417558927397512722128659468888701e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.43331254539687594239741585764730095049e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.99078779616201786316256750758748178864e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04590833634768023225748107112347131311e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.17497990182339853998751740288392648984e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.53420609011698705803549938558385779137e-6), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t / x; + } + + return result; +} + + +template +BOOST_MATH_GPU_ENABLED inline RealType saspoint5_pdf_imp_prec(const RealType& x, const boost::math::integral_constant &tag) { + BOOST_MATH_STD_USING // for ADL of std functions + + return saspoint5_pdf_plus_imp_prec(abs(x), tag); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType saspoint5_pdf_imp_prec(const RealType& x, const boost::math::integral_constant& tag) { + BOOST_MATH_STD_USING // for ADL of std functions + + return saspoint5_pdf_plus_imp_prec(abs(x), tag); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType saspoint5_pdf_imp(const saspoint5_distribution& dist, const RealType& x) { + // + // This calculates the pdf of the Saspoint5 distribution and/or its complement. + // + + BOOST_MATH_STD_USING // for ADL of std functions + constexpr auto function = "boost::math::pdf(saspoint5<%1%>&, %1%)"; + RealType result = 0; + RealType location = dist.location(); + RealType scale = dist.scale(); + + if (false == detail::check_location(function, location, &result, Policy())) + { + return result; + } + if (false == detail::check_scale(function, scale, &result, Policy())) + { + return result; + } + if (false == detail::check_x(function, x, &result, Policy())) + { + return result; + } + + typedef typename tools::promote_args::type result_type; + typedef typename policies::precision::type precision_type; + typedef boost::math::integral_constant tag_type; + + static_assert(tag_type::value, "The SaS point5 distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats"); + + RealType u = (x - location) / scale; + + result = saspoint5_pdf_imp_prec(u, tag_type()) / scale; + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType saspoint5_cdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x < 0.5) { + // Rational Approximation + // Maximum Relative Error: 2.6225e-17 + BOOST_MATH_STATIC const RealType P[16] = { + static_cast(5.0e-1), + static_cast(1.11530082549581486148e2), + static_cast(1.18564167533523512811e4), + static_cast(7.51503793077701705413e5), + static_cast(3.05648233678438482191e7), + static_cast(8.12176734530090957088e8), + static_cast(1.39533182836234507573e10), + static_cast(1.50394359286077974212e11), + static_cast(9.79057903542935575811e11), + static_cast(3.73800992855150140014e12), + static_cast(8.12697090329432868343e12), + static_cast(9.63154058643818290870e12), + static_cast(5.77714904017642642181e12), + static_cast(1.53321958252091815685e12), + static_cast(1.36220966258718212359e11), + static_cast(1.70766655065405022702e9), + }; + BOOST_MATH_STATIC const RealType Q[16] = { + static_cast(1.), + static_cast(2.24333404643898143947e2), + static_cast(2.39984636687021023600e4), + static_cast(1.53353791432086858132e6), + static_cast(6.30764952479861776476e7), + static_cast(1.70405769169309597488e9), + static_cast(3.00381227010195289341e10), + static_cast(3.37519046677507392667e11), + static_cast(2.35001610518109063314e12), + static_cast(9.90961948200767679416e12), + static_cast(2.47066673978544828258e13), + static_cast(3.51442593932882610556e13), + static_cast(2.68891431106117733130e13), + static_cast(9.99723484253582494535e12), + static_cast(1.49190229409236772612e12), + static_cast(5.68752980146893975323e10), + }; + + result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x); + } + else if (x < 1) { + RealType t = x - static_cast (0.5); + + // Rational Approximation + // Maximum Relative Error: 9.2135e-19 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(3.31309550000758082456e-1), + static_cast(1.63012162307622129396e0), + static_cast(2.97763161467248770571e0), + static_cast(2.49277948739575294031e0), + static_cast(9.49619262302649586821e-1), + static_cast(1.38360148984087584165e-1), + static_cast(4.00812864075652334798e-3), + static_cast(-4.82051978765960490940e-5), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(5.43565383128046471592e0), + static_cast(1.13265160672130133152e1), + static_cast(1.13352316246726435292e1), + static_cast(5.56671465170409694873e0), + static_cast(1.21011708389501479550e0), + static_cast(8.34618282872428849500e-2), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 2) { + RealType t = x - 1; + + // Rational Approximation + // Maximum Relative Error: 6.4688e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(2.71280312689343248819e-1), + static_cast(7.44610837974139249205e-1), + static_cast(7.17844128359406982825e-1), + static_cast(2.98789060945288850507e-1), + static_cast(5.22747411439102272576e-2), + static_cast(3.06447984437786430265e-3), + static_cast(2.60407071021044908690e-5), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(3.06221257507188300824e0), + static_cast(3.44827372231472308047e0), + static_cast(1.78166113338930668519e0), + static_cast(4.25580478492907232687e-1), + static_cast(4.09983847731128510426e-2), + static_cast(1.04343172183467651240e-3), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 4) { + RealType t = x - 2; + + // Rational Approximation + // Maximum Relative Error: 8.2289e-18 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(2.13928162275383716645e-1), + static_cast(2.35139109235828185307e-1), + static_cast(9.35967515134932733243e-2), + static_cast(1.64310489592753858417e-2), + static_cast(1.23186728989215889119e-3), + static_cast(3.13500969261032539402e-5), + static_cast(1.17021346758965979212e-7), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(1.28212183177829510267e0), + static_cast(6.17321009406850420793e-1), + static_cast(1.38400318019319970893e-1), + static_cast(1.44994794535896837497e-2), + static_cast(6.17774446282546623636e-4), + static_cast(7.00521050169239269819e-6), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 8) { + RealType t = x - 4; + + // Rational Approximation + // Maximum Relative Error: 3.7284e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(1.63772802979087193656e-1), + static_cast(9.69009603942214234119e-2), + static_cast(2.08261725719828138744e-2), + static_cast(1.97965182693146960970e-3), + static_cast(8.05499273532204276894e-5), + static_cast(1.11401971145777879684e-6), + static_cast(2.25932082770588727842e-9), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(6.92463563872865541733e-1), + static_cast(1.80720987166755982366e-1), + static_cast(2.20416647324531054557e-2), + static_cast(1.26052070140663063778e-3), + static_cast(2.93967534265875431639e-5), + static_cast(1.82706995042259549615e-7), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 16) { + RealType t = x - 8; + + // Rational Approximation + // Maximum Relative Error: 4.9609e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(1.22610122564874280532e-1), + static_cast(3.70273222121572231593e-2), + static_cast(4.06083618461789591121e-3), + static_cast(1.96898134215932126299e-4), + static_cast(4.08421066512186972853e-6), + static_cast(2.87707419853226244584e-8), + static_cast(2.96850126180387702894e-11), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(3.55825191301363023576e-1), + static_cast(4.77251766176046719729e-2), + static_cast(2.99136605131226103925e-3), + static_cast(8.78895785432321899939e-5), + static_cast(1.05235770624006494709e-6), + static_cast(3.35423877769913468556e-9), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 32) { + RealType t = x - 16; + + // Rational Approximation + // Maximum Relative Error: 5.6559e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(9.03056141356415077080e-2), + static_cast(1.37568904417652631821e-2), + static_cast(7.60947271383247418831e-4), + static_cast(1.86048302967560067128e-5), + static_cast(1.94537860496575427218e-7), + static_cast(6.90524093915996283104e-10), + static_cast(3.58808434477817122371e-13), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(1.80501347735272292079e-1), + static_cast(1.22807958286146936376e-2), + static_cast(3.90421541115275676253e-4), + static_cast(5.81669449234915057779e-6), + static_cast(3.53005415676201803667e-8), + static_cast(5.69883025435873921433e-11), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 64) { + RealType t = x - 32; + + // Rational Approximation + // Maximum Relative Error: 6.0653e-17 + BOOST_MATH_STATIC const RealType P[7] = { + static_cast(6.57333571766941474226e-2), + static_cast(5.02795551798163084224e-3), + static_cast(1.39633616037997111325e-4), + static_cast(1.71386564634533872559e-6), + static_cast(8.99508156357247137439e-9), + static_cast(1.60229460572297160486e-11), + static_cast(4.17711709622960498456e-15), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(9.10198637347368265508e-2), + static_cast(3.12263472357578263712e-3), + static_cast(5.00524795130325614005e-5), + static_cast(3.75913188747149725195e-7), + static_cast(1.14970132098893394023e-9), + static_cast(9.34957119271300093120e-13), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + RealType t = 1 / sqrt(x); + + // Rational Approximation + // Maximum Relative Error: 2.0104e-20 + BOOST_MATH_STATIC const RealType P[5] = { + static_cast(3.98942280401432677940e-1), + static_cast(8.12222388783621449146e-2), + static_cast(1.68515703707271703934e-2), + static_cast(2.19801627205374824460e-3), + static_cast(-5.63321705854968264807e-5), + }; + BOOST_MATH_STATIC const RealType Q[5] = { + static_cast(1.), + static_cast(6.02536240902768558315e-1), + static_cast(1.99284471400121092380e-1), + static_cast(3.48012577961755452113e-2), + static_cast(3.38545004473058881799e-3), + }; + + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t; + } + + return result; +} + + +template +BOOST_MATH_GPU_ENABLED inline RealType saspoint5_cdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (x < 0.125) { + // Rational Approximation + // Maximum Relative Error: 6.9340e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[30] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.0e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.25520067710293108163697513129883130648e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.70866020657515874782126804139443323023e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.00865235319309486225795793030882782077e7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.15226363537737769449645357346965170790e10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.90371247243851280277289046301838071764e12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.55124590509169425751300134399513503679e14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.31282020412787511681760982839078664474e16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.81134278666896523873256421982740565131e18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.36154530125229747305141034242362609073e20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.67793867640429875837167908549938345465e22), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34584264816825205490037614178084070903e24), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.52622279567059369718208827282730379468e25), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.84678324511679577282571711018484545185e27), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.99412564257799793932936828924325638617e28), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08467105431111959283045453636520222779e30), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.87466808926544728702827204697734995611e31), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.55020252231174414164534905191762212055e32), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.69582736077420504345389671165954321163e33), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.18203860972249826626461130638196586188e34), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32955733788770318392204091471121129386e35), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.97972270315674052071792562126668438695e35), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.93941537398987201071027348577636994465e36), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.40818708062034138095495206258366082481e36), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.76833406751769751643745383413977973530e36), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.67873467711368838525239991688791162617e36), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.94179310584115437584091984619858795365e36), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24348215908456320362232906012152922949e36), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71625432346533320597285660433110657670e35), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.54662474187354179772157464533408058525e33), + }; + BOOST_MATH_STATIC const RealType Q[31] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05231337496532137901354609636674085703e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.43071888317491317900094470796567113997e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.80864482202910830302921131771345102044e8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.32755297215862998181755216820621285536e10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.86251123527611073428156549377791985741e12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12025543961949466786297141758805461421e15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27681657695574252637426145112570596483e17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17850553865715973904162289375819555884e19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.87285897504702686250962844939736867339e20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.46852796231948446334549476317560711795e22), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.76101689878844725930808096548998198853e24), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14017776727845251567032313915953239178e26), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.83738954971390158348334918235614003163e27), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04701345216121451992682705965658316871e29), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29994190638467725374533751141434904865e30), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.03334024242845994501493644478442360593e31), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.59094378123268840693978620156028975277e32), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.05630254163426327113368743426054256780e33), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.06008195534030444387061989883493342898e34), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.21262490304347036689874956206774563906e35), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52353024633841796119920505314785365242e36), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.28839143293381125956284415313626962263e36), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31074057704096457802547386358094338369e37), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.24762412200364040971704861346921094354e37), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.55663903116458425420509083471048286114e37), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.81839283802391753865642022579846918253e37), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.40026559327708207943879092058654410696e36), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48767474646810049293505781106444169229e36), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10591353097667671736865938428051885499e35), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26980708896893794012677171239610721832e33), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x); + } + else if (x < 0.25) { + RealType t = x - static_cast (0.125); + + // Rational Approximation + // Maximum Relative Error: 9.6106e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.31887921568009055676985827521151969069e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.62791448964529380666250180886203090183e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.18238045199893937316918299064825702894e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.77274519306540522227493503092956314136e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.89424638466340765479970877448972418958e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.84027004420207996285174223581748706097e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.84633134142285937075423713704784530853e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76780579189423063605715733542379494552e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19812409802969581112716039533798357401e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60039008588877024309600768114757310858e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.10268260529501421009222937882726290612e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.72169594688819848498039471657587836720e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.27181379647139697258984772894869505788e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.73617450590346508706222885401965820190e1), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.18558935411552146390814444666395959919e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.49210559503096368944407109881023223654e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.93959323596111340518285858313038058302e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.53590607436758691037825792660167970938e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.82700985983018132572589829602100319330e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.62137033935442506086127262036686905276e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.76014299715348555304267927238963139228e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.12336796972134088340556958396544477713e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.01952132024838508233050167059872220508e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.41846547214877387780832317250797043384e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.02083431572388097955901208994308271581e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.30401057171447074343957754855656724141e4), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 0.5) { + RealType t = x - static_cast (0.25); + + // Rational Approximation + // Maximum Relative Error: 3.1519e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.87119665000174806422420129219814467874e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.60769554551148293079169764245570645155e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.49181979810834706538329284478129952168e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.15722765491675871778645250624425739489e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.65973147084701923411221710174830072860e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.93709338011482232037110656459951914303e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.57393131299425403017769538642434714791e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.24110491141294379107651487490031694257e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23670394514211681515965192338544032862e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.06141024932329394052395469123628405389e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.08599362073145455095790192415468286304e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.22746783794652085925801188098270888502e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.19652234873414609727168969049557770989e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.73529976407853894192156335785920329181e-4), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.04157569499592889296640733909653747983e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.82248883130787159161541119440215325308e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.35191216924911901198168794737654512677e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.05099314677808235578577204150229855903e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.61081069236463123032873733048661305746e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.65340645555368229718826047069323437201e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.44526681322128674428653420882660351679e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.03963804195353853550682049993122898950e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.40399236835577953127465726826981753422e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.46764755170079991793106428011388637748e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.06384106042490712972156545051459068443e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.27614406724572981099586665536543423891e0), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 1) { + RealType t = x - static_cast (0.5); + + // Rational Approximation + // Maximum Relative Error: 7.1196e-37 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31309550000758082761278726632760756847e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.07242222531117199094690544171275415854e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.18286763141875580859241637334381199648e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.72102024869298528501604761974348686708e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31748399999514540052066169132819656757e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.72168003284748405703923567644025252608e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52648991506052496046447777354251378257e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.16777263528764704804758173026143295383e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.66044453196259367950849328889468385159e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.35095952392355288307377427145581700484e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43011308494452327007589069222668324337e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16582092138863383294685790744721021189e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.02261914949200575965813000131964695720e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.93943913630044161720796150617166047233e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.76395009419307902351328300308365369814e-8), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28073115520716780203055949058270715651e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.20245752585870752942356137496087189194e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.34548337034735803039553186623067144497e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.90925817267776213429724248532378895039e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.92883822651628140083115301005227577059e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.72868136219107985834601503784789993218e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.50498744791568911029110559017896701095e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.05178276667813671578581259848923964311e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.16880263792490095344135867620645018480e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16199396397514668672304602774610890666e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.25543193822942088303609988399416145281e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.62522294286034117189844614005500278984e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18342889744790118595835138444372660676e-3), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 2) { + RealType t = x - 1; + + // Rational Approximation + // Maximum Relative Error: 9.5605e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.71280312689343266367958859259591541365e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.49628636612698702680819948707479820292e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.61090930375686902075245639803646265081e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.01191924051756106307211298794294657688e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.42496510376427957390465373165464672088e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.59577769624139820954046058289100998534e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02664809521258420718170586857797408674e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72258711278476951299824066502536249701e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72578941800687566921553416498339481887e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.12553368488232553360765667155702324159e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.20749770911901442251726681861858323649e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00621121212654384864006297569770703900e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18976033102817074104109472578202752346e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22093548539863254922531707899658394458e-10), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.83305694892673455436552817409325835774e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.49922543669955056754932640312490112609e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.23488972536322019584648241457582608908e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.14051527527038669918848981363974859889e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37891280136777182304388426277537358346e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.08058775103864815769223385606687612117e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83305488980337433132332401784292281716e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.71072208215804671719811563659227630554e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.86332040813989094594982937011005305263e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.87698178237970337664105782546771501188e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.69113555019737313680732855691540088318e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31888539972217875242352157306613891243e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85633766164682554126992822326956560433e-8), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 4) { + RealType t = x - 2; + + // Rational Approximation + // Maximum Relative Error: 1.1494e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13928162275383718405630406427822960090e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.90742307267701162395764574873947997211e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.13821826367941514387521090205756466068e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.96186879146063565484800486550739025293e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19438785955706463753454881511977831603e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.44969124820016994689518539612465708536e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27841835070651018079759230944461773079e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.69952429132675045239242077293594666305e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47919853099168659881487026035933933068e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.04644774117864306055402364094681541437e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.52604718870921084048756263996119841957e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.97610950633031564892821158058978809537e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35934159016861180185992558083703785765e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21044098237798939057079316997065892072e-14), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.94437702178976797218081686254875998984e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.82068837586514484653828718675654460991e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.87606058269189306593797764456467061128e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39130528408903116343256483948950693356e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.52792074489091396425713962375223436022e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00892233011840867583848470677898363716e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.53717105060592851173320646706141911461e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57293857675930200001382624769341451561e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04219251796696135508847408131139677925e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.20053006131133304932740325113068767057e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.26384707028090985155079342718673255493e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.19146442700994823924806249608315505708e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.59879298772002950043508762057850408213e-12), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 8) { + RealType t = x - 4; + + // Rational Approximation + // Maximum Relative Error: 1.9710e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63772802979087199762340235165979751298e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.31941534705372320785274994658709390116e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43643960022585762678456016437621064500e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.11415302325466272779041471612529728187e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15767742459744253874067896740220951622e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74049309186016489825053763513176160256e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76349898574685150849080543168157785281e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19757202370729036627932327405149840205e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.32067727965321839898287320520750897894e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47636340015260789807543414080472136575e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36236727340568181129875213546468908164e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.89379573960280486883733996547662506245e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.71832232038263988173042637335112603365e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72380245500539326441037770757072641975e-18), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.51702400281458104713682413542736419584e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01375164846907815766683647295932603968e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.92796007869834847612192314006582598557e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.77580441164023725582659445614058463183e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63592331843149724480258804892989851727e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.87334158717610115008450674967492650941e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46596056941432875244263245821845070102e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.69980051560936361597177347949112822752e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.61690034211585843423761830218320365457e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.40619773800285766355596852314940341504e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.10624533319804091814643828283820958419e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.10009654621246392691126133176423833259e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.48070591106986983088640496621926852293e-16), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 16) { + RealType t = x - 8; + + // Rational Approximation + // Maximum Relative Error: 5.2049e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22610122564874286614786819620499101143e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.30352481858382230273216195795534959290e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.45933050053542949214164590814846222512e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.18776888646200567321599584635465632591e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.53421996228923143480455729204878676265e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.26075686557831306993734433164305349875e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.58045762501721375879877727645933749122e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.13469629033419341069106781092024950086e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09157226556088521407323375433512662525e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44961350323527660188267669752380722085e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11052101325523147964890915835024505324e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.26421404354976214191891992583151033361e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.17133505681224996657291059553060754343e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41010917905686427164414364663355769988e-22), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.31062773739451672808456319166347015167e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35386721434011881226168110614121649232e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.39357338312443465616015226804775178232e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.26630144036271792027494677957363535353e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.24340476859846183414651435036807677467e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.33917136421389571662908749253850939876e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.80972141456523767244381195690041498939e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22653625120465488656616983786525028119e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.64072452032620505897896978124863889812e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.61842001579321492488462230987972104386e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96631619425501661980194304605724632777e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.80392324086028812772385536034034039168e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.34254502871215949266781048808984963366e-20), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 32) { + RealType t = x - 16; + + // Rational Approximation + // Maximum Relative Error: 1.7434e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.03056141356415128156562790092782153630e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.15331583242023443256381237551843296356e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.81847913073640285776566199343276995613e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.23578443960486030170636772457627141406e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36906354316016270165240908809929957836e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.80584421020238085239890207672296651219e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.20726437845755296397071540583729544203e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.71042563703818585243207722641746283288e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.08307373360265947158569900625482137206e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.80776566500233755365518221977875432763e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11405351639704510305055492207286172753e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21575609293568296049921888011966327905e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66081982641748223969990279975752576675e-22), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96483118060215455299182487430511998831e-26), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.77347004038951368607085827825968614455e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.27559305780716801070924630708599448466e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.05452382624230160738008550961679711827e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.75369667590360521677018734348769796476e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.56551290985905942229892419848093494661e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.46972102361871185271727958608184616388e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.72423917010499649257775199140781647069e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14337306905269302583746182007852069459e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.69949885309711859563395555285232232606e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.00318719634300754237920041312234711548e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41139664927184402637020651515172315287e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.79013190225240505774959477465594797961e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.46536966503325413797061462062918707370e-24), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else if (x < 64) { + RealType t = x - 32; + + // Rational Approximation + // Maximum Relative Error: 2.0402e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.57333571766941514095434647381791040479e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15416685251021339933358981066948923001e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.86806753164417557035166075399588122481e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.91972616817770660098405128729991574724e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10225768760715861978198010761036882002e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.05986998674039047865566990469266534338e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.59572646670205456333051888086612875871e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19347294198055585461131949159508730257e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21328285448498841418774425071549974153e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.19331596847283822557042655221763459728e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.36128017817576942059191451016251062072e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.35600223942735523925477855247725326228e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.14658948592500290756690769268766876322e-26), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.86984591055448991335081550609451649866e-30), + }; + BOOST_MATH_STATIC const RealType Q[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.90112824856612652807095815199496602262e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59291524937386142936420775839969648652e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.74245361925275011235694006013677228467e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41828589449615478387532599798645159282e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.08088176420557205743676774127863572768e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.30760429417424419297000535744450830697e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.18464910867914234357511605329900284981e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.74255540513281299503596269087176674333e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.02616028440371294233330747672966435921e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.26276597941744408946918920573146445795e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.47463109867603732992337779860914933775e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.77217411888267832243050973915295217582e-24), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.67397425207383164084527830512920206074e-28), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); + } + else { + RealType t = 1 / sqrt(x); + + // Rational Approximation + // Maximum Relative Error: 9.2612e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[9] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.98942280401432677939946059934381868476e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33908701314796522684603310107061150444e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.92120397142832495974006972404741124398e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.15463147603421962834297353867930971657e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.44488751006069172847577645328482300099e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44057582804743599116332797864164802887e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.02968018188491417839349438941039867033e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.75092244933846337077999183310087492887e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35099582728548602389917143511323566818e-8), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.34601617336219074065534356705298927390e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.82954035780824611941899463895040327299e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.70929001162671283123255408612494541378e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05508596604210030533747793197422815105e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02913299057943756875992272236063124608e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.37824426836648736125759177846682556245e-5), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t; + } + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType saspoint5_cdf_imp_prec(const RealType& x, bool complement, const boost::math::integral_constant& tag) { + if (x >= 0) { + return complement ? saspoint5_cdf_plus_imp_prec(x, tag) : 1 - saspoint5_cdf_plus_imp_prec(x, tag); + } + else if (x <= 0) { + return complement ? 1 - saspoint5_cdf_plus_imp_prec(-x, tag) : saspoint5_cdf_plus_imp_prec(-x, tag); + } + else { + return boost::math::numeric_limits::quiet_NaN(); + } +} + +template +BOOST_MATH_GPU_ENABLED inline RealType saspoint5_cdf_imp_prec(const RealType& x, bool complement, const boost::math::integral_constant& tag) { + if (x >= 0) { + return complement ? saspoint5_cdf_plus_imp_prec(x, tag) : 1 - saspoint5_cdf_plus_imp_prec(x, tag); + } + else if (x <= 0) { + return complement ? 1 - saspoint5_cdf_plus_imp_prec(-x, tag) : saspoint5_cdf_plus_imp_prec(-x, tag); + } + else { + return boost::math::numeric_limits::quiet_NaN(); + } +} + +template +BOOST_MATH_GPU_ENABLED inline RealType saspoint5_cdf_imp(const saspoint5_distribution& dist, const RealType& x, bool complement) { + // + // This calculates the cdf of the Saspoint5 distribution and/or its complement. + // + + BOOST_MATH_STD_USING // for ADL of std functions + constexpr auto function = "boost::math::cdf(saspoint5<%1%>&, %1%)"; + RealType result = 0; + RealType location = dist.location(); + RealType scale = dist.scale(); + + if (false == detail::check_location(function, location, &result, Policy())) + { + return result; + } + if (false == detail::check_scale(function, scale, &result, Policy())) + { + return result; + } + if (false == detail::check_x(function, x, &result, Policy())) + { + return result; + } + + typedef typename tools::promote_args::type result_type; + typedef typename policies::precision::type precision_type; + typedef boost::math::integral_constant tag_type; + + static_assert(tag_type::value, "The SaS point5 distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats"); + + RealType u = (x - location) / scale; + + result = saspoint5_cdf_imp_prec(u, complement, tag_type()); + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType saspoint5_quantile_upper_imp_prec(const RealType& p, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (ilogb(p) >= -2) { + RealType u = -log2(ldexp(p, 1)); + + if (u < 0.125) { + // Rational Approximation + // Maximum Relative Error: 4.2616e-17 + BOOST_MATH_STATIC const RealType P[13] = { + static_cast(1.36099130643975127045e-1), + static_cast(2.19634434498311523885e1), + static_cast(1.70276954848343179287e3), + static_cast(8.02187341786354339306e4), + static_cast(2.48750112198456813443e6), + static_cast(5.20617858300443231437e7), + static_cast(7.31202030685167303439e8), + static_cast(6.66061403138355591915e9), + static_cast(3.65687892725590813998e10), + static_cast(1.06061776220305595494e11), + static_cast(1.23930642673461465346e11), + static_cast(1.49986408149520127078e10), + static_cast(-6.17325587219357123900e8), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + static_cast(1.), + static_cast(1.63111146753825227716e2), + static_cast(1.27864461509685444043e4), + static_cast(6.10371533241799228037e5), + static_cast(1.92422115963507708309e7), + static_cast(4.11544185502250709497e8), + static_cast(5.95343302992055062258e9), + static_cast(5.65615858889758369947e10), + static_cast(3.30833154992293143503e11), + static_cast(1.06032392136054207216e12), + static_cast(1.50071282012095447931e12), + static_cast(5.43552396263989180433e11), + static_cast(9.57434915768660935004e10), + }; + + result = u * tools::evaluate_polynomial(P, u) / (tools::evaluate_polynomial(Q, u) * (p * p)); + } + else if (u < 0.25) { + RealType t = u - static_cast (0.125); + + // Rational Approximation + // Maximum Relative Error: 2.3770e-19 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(1.46698650748920243698e-2), + static_cast(3.58380131788385557227e-1), + static_cast(3.39153750029553194566e0), + static_cast(1.55457424873957272207e1), + static_cast(3.44403897039657057261e1), + static_cast(3.01881531964962975320e1), + static_cast(2.77679052294606319767e0), + static_cast(-7.76665288232972435969e-2), + }; + BOOST_MATH_STATIC const RealType Q[7] = { + static_cast(1.), + static_cast(1.72584280323876188464e1), + static_cast(1.11983518800147654866e2), + static_cast(3.25969893054048132145e2), + static_cast(3.91978809680672051666e2), + static_cast(1.29874252720714897530e2), + static_cast(2.08740114519610102248e1), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p)); + } + else if (u < 0.5) { + RealType t = u - static_cast (0.25); + + // Rational Approximation + // Maximum Relative Error: 9.2445e-18 + BOOST_MATH_STATIC const RealType P[8] = { + static_cast(2.69627866689346445458e-2), + static_cast(3.23091180507445216811e-1), + static_cast(1.42164019533549860681e0), + static_cast(2.74613170828120023406e0), + static_cast(2.07865023346180997996e0), + static_cast(2.53267176863740856907e-1), + static_cast(-2.55816250186301841152e-2), + static_cast(3.02683750470398342224e-3), + }; + BOOST_MATH_STATIC const RealType Q[6] = { + static_cast(1.), + static_cast(8.55049920135376003042e0), + static_cast(2.48726119139047911316e1), + static_cast(2.79519589592198994574e1), + static_cast(9.88212916161823866098e0), + static_cast(1.39749417956251951564e0), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p)); + } + else { + RealType t = u - static_cast (0.5); + + // Rational Approximation + // Maximum Relative Error: 2.2918e-20 + BOOST_MATH_STATIC const RealType P[9] = { + static_cast(4.79518653373241051274e-2), + static_cast(3.81837125793765918564e-1), + static_cast(1.13370353708146321188e0), + static_cast(1.55218145762186846509e0), + static_cast(9.60938271141036509605e-1), + static_cast(2.11811755464425606950e-1), + static_cast(8.84533960603915742831e-3), + static_cast(1.73314614571009160225e-3), + static_cast(-3.63491208733876986098e-5), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1.), + static_cast(6.36954463000253710936e0), + static_cast(1.40601897306833147611e1), + static_cast(1.33838075106916667084e1), + static_cast(5.60958095533108032859e0), + static_cast(1.11796035623375210182e0), + static_cast(1.12508482637488861060e-1), + static_cast(5.18503975949799718538e-3), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p)); + } + } + else if (ilogb(p) >= -4) { + RealType t = -log2(ldexp(p, 2)); + + // Rational Approximation + // Maximum Relative Error: 4.2057e-18 + BOOST_MATH_STATIC const RealType P[10] = { + static_cast(8.02395484493329835881e-2), + static_cast(2.46132933068351274622e-1), + static_cast(2.81820176867119231101e-1), + static_cast(1.47754061028371025893e-1), + static_cast(3.54638964490281023406e-2), + static_cast(3.99998730093393774294e-3), + static_cast(3.81581928434827040262e-4), + static_cast(1.82520920154354221101e-5), + static_cast(-2.06151396745690348445e-7), + static_cast(6.77986548138011345849e-9), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1.), + static_cast(2.39244329037830026691e0), + static_cast(2.12683465416376620896e0), + static_cast(9.02612272334554457823e-1), + static_cast(2.06667959191488815314e-1), + static_cast(2.79328968525257867541e-2), + static_cast(2.28216286216537879937e-3), + static_cast(1.04195690531437767679e-4), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p)); + } + else if (ilogb(p) >= -8) { + RealType t = -log2(ldexp(p, 4)); + + // Rational Approximation + // Maximum Relative Error: 3.3944e-17 + BOOST_MATH_STATIC const RealType P[9] = { + static_cast(1.39293493266195561875e-1), + static_cast(1.26741380938661691592e-1), + static_cast(4.31117040307200265931e-2), + static_cast(7.50528269269498076949e-3), + static_cast(8.63100497178570310436e-4), + static_cast(6.75686286034521991703e-5), + static_cast(3.11102625473120771882e-6), + static_cast(9.63513655399980075083e-8), + static_cast(-6.40223609013005302318e-11), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1.), + static_cast(8.11234548272888947555e-1), + static_cast(2.63525516991753831892e-1), + static_cast(4.77118226533147280522e-2), + static_cast(5.46090741266888954909e-3), + static_cast(4.15325425646862026425e-4), + static_cast(2.02377681998442384863e-5), + static_cast(5.79823311154876056655e-7), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p)); + } + else if (ilogb(p) >= -16) { + RealType t = -log2(ldexp(p, 8)); + + // Rational Approximation + // Maximum Relative Error: 4.1544e-17 + BOOST_MATH_STATIC const RealType P[9] = { + static_cast(1.57911660613037760235e-1), + static_cast(5.59740955695099219682e-2), + static_cast(8.92895854008560399142e-3), + static_cast(8.88795299273855801726e-4), + static_cast(5.66358335596607738071e-5), + static_cast(2.46733195253941569922e-6), + static_cast(6.44829870181825872501e-8), + static_cast(7.62193242864380357931e-10), + static_cast(-7.82035413331699873450e-14), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1.), + static_cast(3.49007782566002620811e-1), + static_cast(5.65303702876260444572e-2), + static_cast(5.54316442661801299351e-3), + static_cast(3.58498995501703237922e-4), + static_cast(1.53872913968336341278e-5), + static_cast(4.08512152326482573624e-7), + static_cast(4.72959615756470826429e-9), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p)); + } + else if (ilogb(p) >= -32) { + RealType t = -log2(ldexp(p, 16)); + + // Rational Approximation + // Maximum Relative Error: 8.5877e-18 + BOOST_MATH_STATIC const RealType P[10] = { + static_cast(1.59150086070234563099e-1), + static_cast(6.07144002506911115092e-2), + static_cast(1.10026443723891740392e-2), + static_cast(1.24892739209332398698e-3), + static_cast(9.82922518655171276487e-5), + static_cast(5.58366837526347222893e-6), + static_cast(2.29005408647580194007e-7), + static_cast(6.44325718317518336404e-9), + static_cast(1.05110361316230054467e-10), + static_cast(1.48083450629432857655e-18), + }; + BOOST_MATH_STATIC const RealType Q[9] = { + static_cast(1.), + static_cast(3.81470315977341203351e-1), + static_cast(6.91330250512167919573e-2), + static_cast(7.84712209182587717077e-3), + static_cast(6.17595479676821181012e-4), + static_cast(3.50829361179041199953e-5), + static_cast(1.43889153071571504712e-6), + static_cast(4.04840254888235877998e-8), + static_cast(6.60429636407045050112e-10), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p)); + } + else if (ilogb(p) >= -64) { + RealType t = -log2(ldexp(p, 32)); + + // Rational Approximation + // Maximum Relative Error: 8.7254e-17 + BOOST_MATH_STATIC const RealType P[9] = { + static_cast(1.59154943017783026201e-1), + static_cast(6.91506515614472069475e-2), + static_cast(1.44590186111155933843e-2), + static_cast(1.92616138327724025421e-3), + static_cast(1.79640147906775699469e-4), + static_cast(1.30852535070639833809e-5), + static_cast(5.55259657884038297268e-7), + static_cast(3.50107118687544980820e-8), + static_cast(-1.47102592933729597720e-22), + }; + BOOST_MATH_STATIC const RealType Q[8] = { + static_cast(1.), + static_cast(4.34486357752330500669e-1), + static_cast(9.08486933075320995164e-2), + static_cast(1.21024289017243304241e-2), + static_cast(1.12871233794777525784e-3), + static_cast(8.22170725751776749123e-5), + static_cast(3.48879932410650101194e-6), + static_cast(2.19978790407451988423e-7), + }; + + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p)); + } + else { + result = 1 / (p * p * constants::two_pi()); + } + + return result; +} + + +template +BOOST_MATH_GPU_ENABLED inline RealType saspoint5_quantile_upper_imp_prec(const RealType& p, const boost::math::integral_constant&) +{ + BOOST_MATH_STD_USING + RealType result; + + if (ilogb(p) >= -2) { + RealType u = -log2(ldexp(p, 1)); + + if (u < 0.125) { + // Rational Approximation + // Maximum Relative Error: 2.5675e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[31] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36099130643975133156293056139850872219e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.03940482189350763127508703926866548690e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.00518276893354880480781640750482315271e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.55844903094077096941027360107304259099e6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04507684135310729583474324660276395831e9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28519957085041757616278379578781441623e11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26054173986187219679917530171252145632e13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00693075272502479915569708465960917906e15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.64153695410984136395853200311209462775e16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.64993034609287363745840801813540992383e18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.68080300629977787949474098413155901197e20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.50632142671665246974634799849090331338e21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11943753054362349397013211631038480307e23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.80601829873419334580289886671478701625e24), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33441650581633426542372642262736818512e26), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.56279427934163518272441555879970370340e27), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.08899113985387092689705022477814364717e28), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.37750989391907347952902900750138805007e29), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.76961267256299304213687639380275530721e30), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.98417586455955659885944915688130612888e31), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.40932923796679251232655132670811114351e32), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.80810239916688876216017180714744912573e33), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.23907429566810200929293428832485038147e33), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11441754640405256305951569489818422227e34), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.30534222360394829628175800718529342304e34), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.73301799323855143458670230536670073483e34), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.53142592196246595846485130434777396548e34), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.81719621726393542967303806360105998384e34), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.00188544550531824809437206713326495544e33), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.62706943144847786115732327787879709587e32), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.32129438774563059735783287456769609571e31), + }; + BOOST_MATH_STATIC const RealType Q[32] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.65910866673514847742559406762379054364e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21954860438789969160116317316418373146e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.85684385746348850219351196129081986508e7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.76131116920014625994371306210585646224e9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.57402411617965582839975369786525269977e11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.42213951996062253608905591667405322835e13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.55477693883842522631954327528060778834e15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.00397907346473927493255003955380711046e17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.76305959503723486331556274939198109922e19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27926540483498824808520492399128682366e21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.98253913105291675445666919447864520248e22), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63457445658532249936389003141915626894e24), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.51446616633910582673057455450707805902e25), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04744823698010333311911891992022528040e27), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.03400927415310540137351756981742318263e28), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.28761940359662123632247441327784689568e29), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.39016138777648624292953560568071708327e30), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.79639567867465767764785448609833337532e31), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.23406781975678544311073661662680006588e32), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.97261483656310352862554580475760827374e33), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62715040832592600542933595577003951697e34), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.77359945057399130202830211722221279906e34), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.07842295432910751940058270741081867701e35), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.51739306780247334064265249344359460675e35), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.60574331076505049588401700048488577194e35), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.08286808700840316336961663635580879141e35), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.27661033115008662284071342245200272702e35), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00465576791024249023365007797010262700e35), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83311248273885136105510175099322638440e34), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96635220211386288597285960837372073054e33), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23849744128418288892902205619933047730e32), + }; + // LCOV_EXCL_STOP + result = u * tools::evaluate_polynomial(P, u) / (tools::evaluate_polynomial(Q, u) * (p * p)); + } + else if (u < 0.25) { + RealType t = u - static_cast (0.125); + + // Rational Approximation + // Maximum Relative Error: 9.0663e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46698650748920243663487731226111319705e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.39021286045890143123252180276484388346e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21933242816562043224009451007344301143e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33741547463966207206741888477702151242e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.29556944160837955334643715180923663741e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.25261081330476435844217173674285740857e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28690563577245995896389783271544510833e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.51764495004238264050843085122188741180e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.00501773552098137637598813101153206656e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.93991776883375928647775429233323885440e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.13059418708769178567954713937745050279e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41565791250614170744069436181282300453e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.35838723365672196069179944509778281549e1), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.63888803456697300467924455320638435538e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.74785179836182339383932806919167693991e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.15133301804008879476562749311747788645e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.87361675398393057971764841741518474061e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02992617475892211368309739891693879676e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36356854400440662641546588001882412251e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.35807552915245783626759227539698719908e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75959389290929178190646034566377062463e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18514088996371641206828142820042918681e5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.54881978220293930450469794941944831047e4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.83958740186543542804045767758191509433e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26637084978098507405883170227585648985e2), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p)); + } + else if (u < 0.5) { + RealType t = u - static_cast (0.25); + + // Rational Approximation + // Maximum Relative Error: 7.1265e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[14] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.69627866689346442965083437425920959525e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.28948812330446670380449765578224539665e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.38832694133021352110245148952631526683e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.70206624753427831733487031852769976576e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34677850226082773550206949299306677736e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.18657422004942861459539366963056149110e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.90933843076824719761937043667767333536e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.78597771586582252472927601403235921029e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.76489020985978559079198751910122765603e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.37662018494780327201390375334403954354e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11303058491765900888068268844399186476e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.38147649159947518976483710606042789880e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.81260575060831053615857196033574207714e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.26783311530618626413866321968979725353e-3), + }; + BOOST_MATH_STATIC const RealType Q[13] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.98941943311823528497840052715295329781e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70142252619301982454969690308614487433e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.17472255695869018956165466705137979540e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.42016169942136311355803413981032780219e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.55874385736597452997483327962434131932e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.45782883079400958761816030672202996788e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.05272877129840019671123017296056938361e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.79833037593794381103412381177370862105e3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.67388248713896792948592889733513376054e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.19952164110429183557842014635391021832e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.43813483967503071358907030110791934870e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.21327682641358836049127780506729428797e-1), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p)); + } + else { + RealType t = u - static_cast (0.5); + + // Rational Approximation + // Maximum Relative Error: 2.7048e-37 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.79518653373241051262822702930040975338e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.62230291299220868262265687829866364204e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.87315544620612697712513318458226575394e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.38993950875334507399211313740958438201e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.54257654902026056547861805085572437922e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85673656862223617197701693270067722169e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47842193222521213922734312546590337064e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.76640627287007744941009407221495229316e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.71884893887802925773271837595143776207e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.87432154629995817972739015224205530101e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44664933176248007092868241686074743562e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.32094739938150047092982705610586287965e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.18537678581395571564129512698022192316e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99365265557355974918712592061740510276e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.66467016868206844419002547523627548705e-6), + }; + BOOST_MATH_STATIC const RealType Q[15] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01315080955831561204744043759079263546e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.43409077070585581955481063438385546913e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09863540097812452102765922256432103612e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.69971336507400724019217277303598318934e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71444880426858110981683485927452024652e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15252748520663939799185721687082682973e2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.28399989835264172624148638350889215004e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.73464700365199500083227290575797895127e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.40421770918884020099427978511354197438e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.97023025282119988988976542004620759235e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.38774609088015115009880504176630591783e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52748138528630655371589047000668876440e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13088455793478303045390386135591069087e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.18220605549460262119565543089703387122e-5), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p)); + } + } + else if (ilogb(p) >= -4) { + RealType t = -log2(ldexp(p, 2)); + + // Rational Approximation + // Maximum Relative Error: 3.8969e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.02395484493329839255216366819344305871e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.02703992140456336967688958960484716694e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.38779662796374026809611637926067177436e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22326903547451397450399124548020897393e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29321119874906326000117036864856138032e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60045794013093831332658415095234082115e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.75863216252160126657107771372004587438e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65658718311497180532644775193008407069e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.53225259384404343896446164609240157391e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17876243295156782920260122855798305258e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59647007234516896762020830535717539733e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.64519789656979327339865975091579252352e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29566724776730544346201080459027524931e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.40979492647851412567441418477263395917e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78894057948338305679452471174923939381e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.97064244496171921075006182915678263370e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13496588267213644899739513941375650458e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.75224691413667093006312591320754720811e-14), + }; + BOOST_MATH_STATIC const RealType Q[17] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.59000688626663121310675150262772434285e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37518060227321498297232252379976917550e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96559443266702775026538144474892076437e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.81865210018244220041408788510705356696e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15188291931842064325756652570456168425e1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.16909307081950035111952362482113369939e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.68454509269150307761046136063890222011e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.06391236761753712424925832120306727169e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.35744804731044427608283991933125506859e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00655928177646208520006978937806043639e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04093230988242553633939757013466501271e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.07921031269974885975846184199640060403e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.61356596082773699708092475561216104426e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83968520269928804453766899533464507543e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.44620973323561344735660659502096499899e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.84398925760354259350870730551452956164e-11), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p)); + } + else if (ilogb(p) >= -8) { + RealType t = -log2(ldexp(p, 4)); + + // Rational Approximation + // Maximum Relative Error: 4.0176e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[17] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39293493266195566603513288406748830312e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75724665658983779947977436518056682748e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.42437549740894393207094008058345312893e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26189619865771499663660627120168211026e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.38952430871711360962228087792821341859e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09604487371653920602809626594722822237e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06215021409396534038209460967790566899e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.02245531075243838209245241246011523536e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.52822482024384335373072062232322682354e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.32527687997718638700761890588399465467e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.54799997015944073019842889902521208940e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59368565314052950335981455903474908073e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.09459594346367728583560281313278117879e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.30296867679720593932307487485758431355e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04594079707862644415224596859620253913e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.40274507498190913768918372242285652373e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.48644117815971872777609922455371868747e-16), + }; + BOOST_MATH_STATIC const RealType Q[17] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88079839671202113888025645668230104601e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.58898753182105924446845274197682915131e0), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.05418719178760837974322764299800701708e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.76439568495464423890950166804368135632e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.87661284201828717694596419805804620767e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29462021166220769918154388930589492957e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.89960014717788045459266868996575581278e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21759236630028632465777310665839652757e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.08884467282860764261728614542418632608e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60098870889198704716300891829788260654e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00120123451682223443624210304146589040e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.08868117923724451329261971335574401646e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.07346130275947166224129347124306950150e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.57848230665832873347797099944091265220e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50841502849442327828534131901583916707e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19165038770000448560339443014882434202e-15), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p)); + } + else if (ilogb(p) >= -16) { + RealType t = -log2(ldexp(p, 8)); + + // Rational Approximation + // Maximum Relative Error: 4.1682e-36 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[17] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57911660613037766795694241662819364797e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28799302413396670477035614399187456630e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.87488304496324715063356722168914018093e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15106082041721012436439208357739139578e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.91744691940169259573871742836817806248e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.40707390548486625606656777332664791183e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.37047148097688601398129659532643297674e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88039545021930711122085375901243257574e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22254460725736448552173288004145978774e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.58462349007293730244197837509157696852e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.95242372547984999431208546685672497090e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10113734998651793201123616276573169622e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.38963677413425618019569452771868834246e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.45242599273032563942546507899265865936e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.64855118157117311049698715635863670233e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.31679318790012894619592273346600264199e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.97289727214495789126072009268721022605e-20), + }; + BOOST_MATH_STATIC const RealType Q[17] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.10184661848812835285809771940181522329e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.06179300560230499194426573196970342618e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.23171547302923911058112454487643162794e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20486436116678834807354529081908850425e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.51239574861351183874145649960640500707e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48939385253081273966380467344920741615e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18148716720470800170115047757600735127e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.68156131480770927662478944117713742978e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.13720275846166334505537351224097058812e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85505701632948614345319635028225905820e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.91876669388212587242659571229471930880e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.12971661051277278610784329698988278013e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.31096179726750865531615367639563072055e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.03579138802970748888093188937926461893e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45570688568663643410924100311054014175e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23959804461200982866930072222355142173e-19), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p)); + } + else if (ilogb(p) >= -32) { + RealType t = -log2(ldexp(p, 16)); + + // Rational Approximation + // Maximum Relative Error: 6.2158e-37 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[17] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59150086070234561732507586188017224084e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.80849532387385837583114307010320459997e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41158479406270598752210344238285334672e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88824037165656723581890282427897772492e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.82912940787568736176030025420621547622e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36469458704261637785603215754389736108e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.13801486421774537025334682673091205328e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.97058432407176502984043208925327069250e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60823277541385163663463406307766614689e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.45016369260792040947272022706860047646e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.54846457278644736871929319230398689553e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.67291749890916930953794688556299297735e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.18742803398417392282841454979723852423e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13337431668170547244474715030235433597e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37782648734897338547414800391203459036e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17863064141234633971470839644872485483e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.58768205048500915346781559321978174829e-24), + }; + BOOST_MATH_STATIC const RealType Q[17] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.27782279546086824129750042200649907991e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.86934772625607907724733810228981095894e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18640923531164938140838239032346416143e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14927915778694317602192656254187608215e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.57462121236985574785071163024761935943e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.11326475640883361512750692176665937785e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.49479333407835032831192117009600622344e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01048164044583907219965175201239136609e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.42444147056333448589159611785359705792e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.72928365136507710372724683325279209464e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30776323450676114149657959931149982200e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.51599272091669693373558919762006698549e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.12120236145539526122748260176385899774e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.65713807525694136400636427188839379484e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.40555520515542383952495965093730818381e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.51084407433793180162386990118245623958e-23), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p)); + } + else if (ilogb(p) >= -64) { + RealType t = -log2(ldexp(p, 32)); + + // Rational Approximation + // Maximum Relative Error: 9.8515e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59154943017783040087729009335921759322e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.35955784629344586058432079844665517425e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24333525582177610783141409282489279582e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.58257137499954581519132407255793210808e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47191495695958634792434622715063010854e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06408464185207904662485396901099847317e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.20796977470988464880970001894205834196e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.99451680244976178843047944033382023574e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21331607817814211329055723244764031561e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.16997758215752306644496702331954449485e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22151810180865778439184946086488092970e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.05017329554372903197056366604190738772e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.45919279055502465343977575104142733356e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14611865933281087898817644094411667861e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66574579315129285098834562564888533591e-19), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.90098275536617376789480602467351545227e-21), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.56200324658873566425094389271790730206e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.35526648761411463124801128103381691418e-26), + }; + BOOST_MATH_STATIC const RealType Q[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.99582804063194774835771688139366152937e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.81210581320456331960539046132284190053e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.94358920922810097599951120081974275145e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.24831443209858422294319043037419780210e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.68584098673893150929178892200446909375e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.90058244779420124535106788512940199547e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88151039746201934320258158884886191886e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.62348975553160355852344937226490493460e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.62007418751593938350474825754731470453e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.67502458979132962529935588245058477825e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.91648040348401277706598232576212305626e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.05843052379331618504561151714467880641e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.20127592059771206959014911028588129920e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04661894930286305556240859086772458465e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19442269177165740287568170417649762849e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.81435584873372180820418114652670864136e-23), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.62145023253666168339801687459484937001e-25), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p)); + } + else if (ilogb(p) >= -128) { + RealType t = -log2(ldexp(p, 64)); + + // Rational Approximation + // Maximum Relative Error: 2.2157e-35 + // LCOV_EXCL_START + BOOST_MATH_STATIC const RealType P[18] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59154943091895335751628149866310390641e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.91164927854420277537616294413463565970e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47557801928232619499125670863084398577e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06172621625221091203249391660455847328e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11720157411653968975956625234656001375e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70086412127379161257840749700428137407e-5), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11069186177775505692019195793079552937e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.04581765901792649215653828121992908775e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78996797234624395264657873201296117159e-9), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10365978021268853654282661591051834468e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.76744621013787434243259445839624450867e-12), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11110170303355425599446515240949433934e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.83669090335022069229153919882930282425e-15), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.09633460833089193733622172621696983652e-17), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16200852052266861122422190933586966917e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47795810090424252745150042033544310609e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35722092370326505616747155207965300634e-22), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.98381676423023212724768510437325359364e-51), + }; + BOOST_MATH_STATIC const RealType Q[17] = { + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.34271731953273239691423485699928257808e-1), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.27133013035186849140772481980360839559e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29542078693828543560388747333519393752e-2), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33027698228265344561650492600955601983e-3), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06868444562964057795387778972002636261e-4), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.97868278672593071151212650783507879919e-6), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.79869926850283188785885503049178903204e-7), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.75298857713475428388051708713106549590e-8), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.93449891515741631942400181171061740767e-10), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36715626731277089044713008494971829270e-11), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.98125789528264426960496891930942311971e-13), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78234546049400950544724588355539821858e-14), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.83044000387150792693434128054414524740e-16), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.30111486296552039483196431122555524886e-18), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.28628462422858135083238952377446358986e-20), + BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48108558735886480298604270981393793162e-21), + }; + // LCOV_EXCL_STOP + result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p)); + } + else { + result = 1 / (p * p * constants::two_pi()); + } + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType saspoint5_quantile_imp_prec(const RealType& p, bool complement, const boost::math::integral_constant& tag) +{ + if (p > 0.5) { + return !complement ? saspoint5_quantile_upper_imp_prec(1 - p, tag) : -saspoint5_quantile_upper_imp_prec(1 - p, tag); + } + + return complement ? saspoint5_quantile_upper_imp_prec(p, tag) : -saspoint5_quantile_upper_imp_prec(p, tag); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType saspoint5_quantile_imp_prec(const RealType& p, bool complement, const boost::math::integral_constant& tag) +{ + if (p > 0.5) { + return !complement ? saspoint5_quantile_upper_imp_prec(1 - p, tag) : -saspoint5_quantile_upper_imp_prec(1 - p, tag); + } + + return complement ? saspoint5_quantile_upper_imp_prec(p, tag) : -saspoint5_quantile_upper_imp_prec(p, tag); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType saspoint5_quantile_imp(const saspoint5_distribution& dist, const RealType& p, bool complement) +{ + // This routine implements the quantile for the Saspoint5 distribution, + // the value p may be the probability, or its complement if complement=true. + + constexpr auto function = "boost::math::quantile(saspoint5<%1%>&, %1%)"; + BOOST_MATH_STD_USING // for ADL of std functions + + RealType result = 0; + RealType scale = dist.scale(); + RealType location = dist.location(); + + if (false == detail::check_location(function, location, &result, Policy())) + { + return result; + } + if (false == detail::check_scale(function, scale, &result, Policy())) + { + return result; + } + if (false == detail::check_probability(function, p, &result, Policy())) + { + return result; + } + + typedef typename tools::promote_args::type result_type; + typedef typename policies::precision::type precision_type; + typedef boost::math::integral_constant tag_type; + + static_assert(tag_type::value, "The SaS point5 distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats"); + + result = location + scale * saspoint5_quantile_imp_prec(p, complement, tag_type()); + + return result; +} + +template +BOOST_MATH_GPU_ENABLED inline RealType saspoint5_entropy_imp_prec(const boost::math::integral_constant&) +{ + return static_cast(3.63992444568030649573); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType saspoint5_entropy_imp_prec(const boost::math::integral_constant&) +{ + return BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.6399244456803064957308496039071853510); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType saspoint5_entropy_imp(const saspoint5_distribution& dist) +{ + // This implements the entropy for the Saspoint5 distribution, + + constexpr auto function = "boost::math::entropy(saspoint5<%1%>&, %1%)"; + BOOST_MATH_STD_USING // for ADL of std functions + + RealType result = 0; + RealType scale = dist.scale(); + + if (false == detail::check_scale(function, scale, &result, Policy())) + { + return result; + } + + typedef typename tools::promote_args::type result_type; + typedef typename policies::precision::type precision_type; + typedef boost::math::integral_constant tag_type; + + static_assert(tag_type::value, "The SaS point5 distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats"); + + result = saspoint5_entropy_imp_prec(tag_type()) + log(scale); + + return result; +} + +} // detail + +template > +class saspoint5_distribution +{ + public: + typedef RealType value_type; + typedef Policy policy_type; + + BOOST_MATH_GPU_ENABLED saspoint5_distribution(RealType l_location = 0, RealType l_scale = 1) + : mu(l_location), c(l_scale) + { + constexpr auto function = "boost::math::saspoint5_distribution<%1%>::saspoint5_distribution"; + RealType result = 0; + detail::check_location(function, l_location, &result, Policy()); + detail::check_scale(function, l_scale, &result, Policy()); + } // saspoint5_distribution + + BOOST_MATH_GPU_ENABLED RealType location()const + { + return mu; + } + BOOST_MATH_GPU_ENABLED RealType scale()const + { + return c; + } + + private: + RealType mu; // The location parameter. + RealType c; // The scale parameter. +}; + +typedef saspoint5_distribution saspoint5; + +#ifdef __cpp_deduction_guides +template +saspoint5_distribution(RealType) -> saspoint5_distribution::type>; +template +saspoint5_distribution(RealType, RealType) -> saspoint5_distribution::type>; +#endif + +template +BOOST_MATH_GPU_ENABLED inline const boost::math::pair range(const saspoint5_distribution&) +{ // Range of permissible values for random variable x. + BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits::has_infinity) + { + return boost::math::pair(-boost::math::numeric_limits::infinity(), boost::math::numeric_limits::infinity()); // - to + infinity. + } + else + { // Can only use max_value. + using boost::math::tools::max_value; + return boost::math::pair(-max_value(), max_value()); // - to + max. + } +} + +template +BOOST_MATH_GPU_ENABLED inline const boost::math::pair support(const saspoint5_distribution&) +{ // Range of supported values for random variable x. + // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. + BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits::has_infinity) + { + return boost::math::pair(-boost::math::numeric_limits::infinity(), boost::math::numeric_limits::infinity()); // - to + infinity. + } + else + { // Can only use max_value. + using boost::math::tools::max_value; + return boost::math::pair(-tools::max_value(), max_value()); // - to + max. + } +} + +template +BOOST_MATH_GPU_ENABLED inline RealType pdf(const saspoint5_distribution& dist, const RealType& x) +{ + return detail::saspoint5_pdf_imp(dist, x); +} // pdf + +template +BOOST_MATH_GPU_ENABLED inline RealType cdf(const saspoint5_distribution& dist, const RealType& x) +{ + return detail::saspoint5_cdf_imp(dist, x, false); +} // cdf + +template +BOOST_MATH_GPU_ENABLED inline RealType quantile(const saspoint5_distribution& dist, const RealType& p) +{ + return detail::saspoint5_quantile_imp(dist, p, false); +} // quantile + +template +BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) +{ + return detail::saspoint5_cdf_imp(c.dist, c.param, true); +} // cdf complement + +template +BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) +{ + return detail::saspoint5_quantile_imp(c.dist, c.param, true); +} // quantile complement + +template +BOOST_MATH_GPU_ENABLED inline RealType mean(const saspoint5_distribution &dist) +{ + // There is no mean: + typedef typename Policy::assert_undefined_type assert_type; + static_assert(assert_type::value == 0, "The SaS point5 Distribution has no mean"); + + return policies::raise_domain_error( + "boost::math::mean(saspoint5<%1%>&)", + "The SaS point5 distribution does not have a mean: " + "the only possible return value is %1%.", + boost::math::numeric_limits::quiet_NaN(), Policy()); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType variance(const saspoint5_distribution& /*dist*/) +{ + // There is no variance: + typedef typename Policy::assert_undefined_type assert_type; + static_assert(assert_type::value == 0, "The SaS point5 Distribution has no variance"); + + return policies::raise_domain_error( + "boost::math::variance(saspoint5<%1%>&)", + "The SaS point5 distribution does not have a variance: " + "the only possible return value is %1%.", + boost::math::numeric_limits::quiet_NaN(), Policy()); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType mode(const saspoint5_distribution& dist) +{ + return dist.location(); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType median(const saspoint5_distribution& dist) +{ + return dist.location(); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType skewness(const saspoint5_distribution& /*dist*/) +{ + // There is no skewness: + typedef typename Policy::assert_undefined_type assert_type; + static_assert(assert_type::value == 0, "The SaS point5 Distribution has no skewness"); + + return policies::raise_domain_error( + "boost::math::skewness(saspoint5<%1%>&)", + "The SaS point5 distribution does not have a skewness: " + "the only possible return value is %1%.", + boost::math::numeric_limits::quiet_NaN(), Policy()); // infinity? +} + +template +BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const saspoint5_distribution& /*dist*/) +{ + // There is no kurtosis: + typedef typename Policy::assert_undefined_type assert_type; + static_assert(assert_type::value == 0, "The SaS point5 Distribution has no kurtosis"); + + return policies::raise_domain_error( + "boost::math::kurtosis(saspoint5<%1%>&)", + "The SaS point5 distribution does not have a kurtosis: " + "the only possible return value is %1%.", + boost::math::numeric_limits::quiet_NaN(), Policy()); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const saspoint5_distribution& /*dist*/) +{ + // There is no kurtosis excess: + typedef typename Policy::assert_undefined_type assert_type; + static_assert(assert_type::value == 0, "The SaS point5 Distribution has no kurtosis excess"); + + return policies::raise_domain_error( + "boost::math::kurtosis_excess(saspoint5<%1%>&)", + "The SaS point5 distribution does not have a kurtosis: " + "the only possible return value is %1%.", + boost::math::numeric_limits::quiet_NaN(), Policy()); +} + +template +BOOST_MATH_GPU_ENABLED inline RealType entropy(const saspoint5_distribution& dist) +{ + return detail::saspoint5_entropy_imp(dist); +} + +}} // namespaces + + +#endif // BOOST_STATS_SASPOINT5_HPP diff --git a/include/boost/math/distributions/students_t.hpp b/include/boost/math/distributions/students_t.hpp index b01b8aa0fc..39f20d6e41 100644 --- a/include/boost/math/distributions/students_t.hpp +++ b/include/boost/math/distributions/students_t.hpp @@ -1,7 +1,7 @@ // Copyright John Maddock 2006. // Copyright Paul A. Bristow 2006, 2012, 2017. // Copyright Thomas Mang 2012. - +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -12,14 +12,17 @@ // http://en.wikipedia.org/wiki/Student%27s_t_distribution // http://www.itl.nist.gov/div898/handbook/eda/section3/eda3664.htm +#include +#include +#include +#include #include #include // for ibeta(a, b, x). #include #include #include #include - -#include +#include #ifdef _MSC_VER # pragma warning(push) @@ -35,20 +38,20 @@ class students_t_distribution typedef RealType value_type; typedef Policy policy_type; - students_t_distribution(RealType df) : df_(df) + BOOST_MATH_GPU_ENABLED students_t_distribution(RealType df) : df_(df) { // Constructor. RealType result; detail::check_df_gt0_to_inf( // Checks that df > 0 or df == inf. "boost::math::students_t_distribution<%1%>::students_t_distribution", df_, &result, Policy()); } // students_t_distribution - RealType degrees_of_freedom()const + BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom()const { return df_; } // Parameter estimation: - static RealType find_degrees_of_freedom( + BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom( RealType difference_from_mean, RealType alpha, RealType beta, @@ -68,26 +71,26 @@ students_t_distribution(RealType)->students_t_distribution -inline const std::pair range(const students_t_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline const boost::math::pair range(const students_t_distribution& /*dist*/) { // Range of permissible values for random variable x. // Now including infinity. using boost::math::tools::max_value; - //return std::pair(-max_value(), max_value()); - return std::pair(((::std::numeric_limits::is_specialized & ::std::numeric_limits::has_infinity) ? -std::numeric_limits::infinity() : -max_value()), ((::std::numeric_limits::is_specialized & ::std::numeric_limits::has_infinity) ? +std::numeric_limits::infinity() : +max_value())); + //return boost::math::pair(-max_value(), max_value()); + return boost::math::pair(((::boost::math::numeric_limits::is_specialized & ::boost::math::numeric_limits::has_infinity) ? -boost::math::numeric_limits::infinity() : -max_value()), ((::boost::math::numeric_limits::is_specialized & ::boost::math::numeric_limits::has_infinity) ? +boost::math::numeric_limits::infinity() : +max_value())); } template -inline const std::pair support(const students_t_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline const boost::math::pair support(const students_t_distribution& /*dist*/) { // Range of supported values for random variable x. // Now including infinity. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. using boost::math::tools::max_value; - //return std::pair(-max_value(), max_value()); - return std::pair(((::std::numeric_limits::is_specialized & ::std::numeric_limits::has_infinity) ? -std::numeric_limits::infinity() : -max_value()), ((::std::numeric_limits::is_specialized & ::std::numeric_limits::has_infinity) ? +std::numeric_limits::infinity() : +max_value())); + //return boost::math::pair(-max_value(), max_value()); + return boost::math::pair(((::boost::math::numeric_limits::is_specialized & ::boost::math::numeric_limits::has_infinity) ? -boost::math::numeric_limits::infinity() : -max_value()), ((::boost::math::numeric_limits::is_specialized & ::boost::math::numeric_limits::has_infinity) ? +boost::math::numeric_limits::infinity() : +max_value())); } template -inline RealType pdf(const students_t_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType pdf(const students_t_distribution& dist, const RealType& x) { BOOST_FPU_EXCEPTION_GUARD BOOST_MATH_STD_USING // for ADL of std functions. @@ -135,7 +138,7 @@ inline RealType pdf(const students_t_distribution& dist, const } // pdf template -inline RealType cdf(const students_t_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const students_t_distribution& dist, const RealType& x) { RealType error_result; // degrees_of_freedom > 0 or infinity check: @@ -209,7 +212,7 @@ inline RealType cdf(const students_t_distribution& dist, const } // cdf template -inline RealType quantile(const students_t_distribution& dist, const RealType& p) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const students_t_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std functions // @@ -218,7 +221,7 @@ inline RealType quantile(const students_t_distribution& dist, // Check for domain errors: RealType df = dist.degrees_of_freedom(); - static const char* function = "boost::math::quantile(const students_t_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const students_t_distribution<%1%>&, %1%)"; RealType error_result; if(false == (detail::check_df_gt0_to_inf( // Check that df > 0 or == +infinity. function, df, &error_result, Policy()) @@ -263,13 +266,13 @@ inline RealType quantile(const students_t_distribution& dist, } // quantile template -inline RealType cdf(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { return cdf(c.dist, -c.param); } template -inline RealType quantile(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { return -quantile(c.dist, c.param); } @@ -284,10 +287,10 @@ namespace detail{ template struct sample_size_func { - sample_size_func(RealType a, RealType b, RealType s, RealType d) + BOOST_MATH_GPU_ENABLED sample_size_func(RealType a, RealType b, RealType s, RealType d) : alpha(a), beta(b), ratio(s*s/(d*d)) {} - RealType operator()(const RealType& df) + BOOST_MATH_GPU_ENABLED RealType operator()(const RealType& df) { if(df <= tools::min_value()) { // @@ -308,14 +311,14 @@ struct sample_size_func } // namespace detail template -RealType students_t_distribution::find_degrees_of_freedom( +BOOST_MATH_GPU_ENABLED RealType students_t_distribution::find_degrees_of_freedom( RealType difference_from_mean, RealType alpha, RealType beta, RealType sd, RealType hint) { - static const char* function = "boost::math::students_t_distribution<%1%>::find_degrees_of_freedom"; + constexpr auto function = "boost::math::students_t_distribution<%1%>::find_degrees_of_freedom"; // // Check for domain errors: // @@ -330,8 +333,8 @@ RealType students_t_distribution::find_degrees_of_freedom( detail::sample_size_func f(alpha, beta, sd, difference_from_mean); tools::eps_tolerance tol(policies::digits()); - std::uintmax_t max_iter = policies::get_max_root_iterations(); - std::pair r = tools::bracket_and_solve_root(f, hint, RealType(2), false, tol, max_iter, Policy()); + boost::math::uintmax_t max_iter = policies::get_max_root_iterations(); + boost::math::pair r = tools::bracket_and_solve_root(f, hint, RealType(2), false, tol, max_iter, Policy()); RealType result = r.first + (r.second - r.first) / 2; if(max_iter >= policies::get_max_root_iterations()) { @@ -342,14 +345,14 @@ RealType students_t_distribution::find_degrees_of_freedom( } template -inline RealType mode(const students_t_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType mode(const students_t_distribution& /*dist*/) { // Assume no checks on degrees of freedom are useful (unlike mean). return 0; // Always zero by definition. } template -inline RealType median(const students_t_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType median(const students_t_distribution& /*dist*/) { // Assume no checks on degrees of freedom are useful (unlike mean). return 0; // Always zero by definition. @@ -358,7 +361,7 @@ inline RealType median(const students_t_distribution& /*dist*/ // See section 5.1 on moments at http://en.wikipedia.org/wiki/Student%27s_t-distribution template -inline RealType mean(const students_t_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mean(const students_t_distribution& dist) { // Revised for https://svn.boost.org/trac/boost/ticket/7177 RealType df = dist.degrees_of_freedom(); if(((boost::math::isnan)(df)) || (df <= 1) ) @@ -366,13 +369,13 @@ inline RealType mean(const students_t_distribution& dist) return policies::raise_domain_error( "boost::math::mean(students_t_distribution<%1%> const&, %1%)", "Mean is undefined for degrees of freedom < 1 but got %1%.", df, Policy()); - return std::numeric_limits::quiet_NaN(); + return boost::math::numeric_limits::quiet_NaN(); } return 0; } // mean template -inline RealType variance(const students_t_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType variance(const students_t_distribution& dist) { // http://en.wikipedia.org/wiki/Student%27s_t-distribution // Revised for https://svn.boost.org/trac/boost/ticket/7177 RealType df = dist.degrees_of_freedom(); @@ -382,7 +385,7 @@ inline RealType variance(const students_t_distribution& dist) "boost::math::variance(students_t_distribution<%1%> const&, %1%)", "variance is undefined for degrees of freedom <= 2, but got %1%.", df, Policy()); - return std::numeric_limits::quiet_NaN(); // Undefined. + return boost::math::numeric_limits::quiet_NaN(); // Undefined. } if ((boost::math::isinf)(df)) { // +infinity. @@ -404,7 +407,7 @@ inline RealType variance(const students_t_distribution& dist) } // variance template -inline RealType skewness(const students_t_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType skewness(const students_t_distribution& dist) { RealType df = dist.degrees_of_freedom(); if( ((boost::math::isnan)(df)) || (dist.degrees_of_freedom() <= 3)) @@ -413,13 +416,13 @@ inline RealType skewness(const students_t_distribution& dist) "boost::math::skewness(students_t_distribution<%1%> const&, %1%)", "Skewness is undefined for degrees of freedom <= 3, but got %1%.", dist.degrees_of_freedom(), Policy()); - return std::numeric_limits::quiet_NaN(); + return boost::math::numeric_limits::quiet_NaN(); } return 0; // For all valid df, including infinity. } // skewness template -inline RealType kurtosis(const students_t_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const students_t_distribution& dist) { RealType df = dist.degrees_of_freedom(); if(((boost::math::isnan)(df)) || (df <= 4)) @@ -428,7 +431,7 @@ inline RealType kurtosis(const students_t_distribution& dist) "boost::math::kurtosis(students_t_distribution<%1%> const&, %1%)", "Kurtosis is undefined for degrees of freedom <= 4, but got %1%.", df, Policy()); - return std::numeric_limits::quiet_NaN(); // Undefined. + return boost::math::numeric_limits::quiet_NaN(); // Undefined. } if ((boost::math::isinf)(df)) { // +infinity. @@ -451,7 +454,7 @@ inline RealType kurtosis(const students_t_distribution& dist) } // kurtosis template -inline RealType kurtosis_excess(const students_t_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const students_t_distribution& dist) { // see http://mathworld.wolfram.com/Kurtosis.html @@ -462,7 +465,7 @@ inline RealType kurtosis_excess(const students_t_distribution& "boost::math::kurtosis_excess(students_t_distribution<%1%> const&, %1%)", "Kurtosis_excess is undefined for degrees of freedom <= 4, but got %1%.", df, Policy()); - return std::numeric_limits::quiet_NaN(); // Undefined. + return boost::math::numeric_limits::quiet_NaN(); // Undefined. } if ((boost::math::isinf)(df)) { // +infinity. @@ -484,10 +487,9 @@ inline RealType kurtosis_excess(const students_t_distribution& } template -inline RealType entropy(const students_t_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType entropy(const students_t_distribution& dist) { - using std::log; - using std::sqrt; + BOOST_MATH_STD_USING RealType v = dist.degrees_of_freedom(); RealType vp1 = (v+1)/2; RealType vd2 = v/2; diff --git a/include/boost/math/distributions/triangular.hpp b/include/boost/math/distributions/triangular.hpp index 950d78147f..b333ddbc31 100644 --- a/include/boost/math/distributions/triangular.hpp +++ b/include/boost/math/distributions/triangular.hpp @@ -16,20 +16,20 @@ // http://en.wikipedia.org/wiki/Triangular_distribution +#include +#include #include #include #include #include #include -#include - namespace boost{ namespace math { namespace detail { template - inline bool check_triangular_lower( + BOOST_MATH_GPU_ENABLED inline bool check_triangular_lower( const char* function, RealType lower, RealType* result, const Policy& pol) @@ -48,7 +48,7 @@ namespace boost{ namespace math } // bool check_triangular_lower( template - inline bool check_triangular_mode( + BOOST_MATH_GPU_ENABLED inline bool check_triangular_mode( const char* function, RealType mode, RealType* result, const Policy& pol) @@ -67,7 +67,7 @@ namespace boost{ namespace math } // bool check_triangular_mode( template - inline bool check_triangular_upper( + BOOST_MATH_GPU_ENABLED inline bool check_triangular_upper( const char* function, RealType upper, RealType* result, const Policy& pol) @@ -86,7 +86,7 @@ namespace boost{ namespace math } // bool check_triangular_upper( template - inline bool check_triangular_x( + BOOST_MATH_GPU_ENABLED inline bool check_triangular_x( const char* function, RealType const& x, RealType* result, const Policy& pol) @@ -105,7 +105,7 @@ namespace boost{ namespace math } // bool check_triangular_x template - inline bool check_triangular( + BOOST_MATH_GPU_ENABLED inline bool check_triangular( const char* function, RealType lower, RealType mode, @@ -153,7 +153,7 @@ namespace boost{ namespace math typedef RealType value_type; typedef Policy policy_type; - triangular_distribution(RealType l_lower = -1, RealType l_mode = 0, RealType l_upper = 1) + BOOST_MATH_GPU_ENABLED triangular_distribution(RealType l_lower = -1, RealType l_mode = 0, RealType l_upper = 1) : m_lower(l_lower), m_mode(l_mode), m_upper(l_upper) // Constructor. { // Evans says 'standard triangular' is lower 0, mode 1/2, upper 1, // has median sqrt(c/2) for c <=1/2 and 1 - sqrt(1-c)/2 for c >= 1/2 @@ -163,15 +163,15 @@ namespace boost{ namespace math detail::check_triangular("boost::math::triangular_distribution<%1%>::triangular_distribution",l_lower, l_mode, l_upper, &result, Policy()); } // Accessor functions. - RealType lower()const + BOOST_MATH_GPU_ENABLED RealType lower()const { return m_lower; } - RealType mode()const + BOOST_MATH_GPU_ENABLED RealType mode()const { return m_mode; } - RealType upper()const + BOOST_MATH_GPU_ENABLED RealType upper()const { return m_upper; } @@ -194,23 +194,23 @@ namespace boost{ namespace math #endif template - inline const std::pair range(const triangular_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair range(const triangular_distribution& /* dist */) { // Range of permissible values for random variable x. using boost::math::tools::max_value; - return std::pair(-max_value(), max_value()); + return boost::math::pair(-max_value(), max_value()); } template - inline const std::pair support(const triangular_distribution& dist) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair support(const triangular_distribution& dist) { // Range of supported values for random variable x. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. - return std::pair(dist.lower(), dist.upper()); + return boost::math::pair(dist.lower(), dist.upper()); } template - RealType pdf(const triangular_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED RealType pdf(const triangular_distribution& dist, const RealType& x) { - static const char* function = "boost::math::pdf(const triangular_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::pdf(const triangular_distribution<%1%>&, %1%)"; RealType lower = dist.lower(); RealType mode = dist.mode(); RealType upper = dist.upper(); @@ -246,9 +246,9 @@ namespace boost{ namespace math } // RealType pdf(const triangular_distribution& dist, const RealType& x) template - inline RealType cdf(const triangular_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED inline RealType cdf(const triangular_distribution& dist, const RealType& x) { - static const char* function = "boost::math::cdf(const triangular_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const triangular_distribution<%1%>&, %1%)"; RealType lower = dist.lower(); RealType mode = dist.mode(); RealType upper = dist.upper(); @@ -281,10 +281,10 @@ namespace boost{ namespace math } // RealType cdf(const triangular_distribution& dist, const RealType& x) template - RealType quantile(const triangular_distribution& dist, const RealType& p) + BOOST_MATH_GPU_ENABLED RealType quantile(const triangular_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std functions (sqrt). - static const char* function = "boost::math::quantile(const triangular_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const triangular_distribution<%1%>&, %1%)"; RealType lower = dist.lower(); RealType mode = dist.mode(); RealType upper = dist.upper(); @@ -324,9 +324,9 @@ namespace boost{ namespace math } // RealType quantile(const triangular_distribution& dist, const RealType& q) template - RealType cdf(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { - static const char* function = "boost::math::cdf(const triangular_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(const triangular_distribution<%1%>&, %1%)"; RealType lower = c.dist.lower(); RealType mode = c.dist.mode(); RealType upper = c.dist.upper(); @@ -359,10 +359,10 @@ namespace boost{ namespace math } // RealType cdf(const complemented2_type, RealType>& c) template - RealType quantile(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // Aid ADL for sqrt. - static const char* function = "boost::math::quantile(const triangular_distribution<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(const triangular_distribution<%1%>&, %1%)"; RealType l = c.dist.lower(); RealType m = c.dist.mode(); RealType u = c.dist.upper(); @@ -408,9 +408,9 @@ namespace boost{ namespace math } // RealType quantile(const complemented2_type, RealType>& c) template - inline RealType mean(const triangular_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mean(const triangular_distribution& dist) { - static const char* function = "boost::math::mean(const triangular_distribution<%1%>&)"; + constexpr auto function = "boost::math::mean(const triangular_distribution<%1%>&)"; RealType lower = dist.lower(); RealType mode = dist.mode(); RealType upper = dist.upper(); @@ -424,9 +424,9 @@ namespace boost{ namespace math template - inline RealType variance(const triangular_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType variance(const triangular_distribution& dist) { - static const char* function = "boost::math::mean(const triangular_distribution<%1%>&)"; + constexpr auto function = "boost::math::mean(const triangular_distribution<%1%>&)"; RealType lower = dist.lower(); RealType mode = dist.mode(); RealType upper = dist.upper(); @@ -439,9 +439,9 @@ namespace boost{ namespace math } // RealType variance(const triangular_distribution& dist) template - inline RealType mode(const triangular_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mode(const triangular_distribution& dist) { - static const char* function = "boost::math::mode(const triangular_distribution<%1%>&)"; + constexpr auto function = "boost::math::mode(const triangular_distribution<%1%>&)"; RealType mode = dist.mode(); RealType result = 0; // of checks. if(false == detail::check_triangular_mode(function, mode, &result, Policy())) @@ -452,10 +452,10 @@ namespace boost{ namespace math } // RealType mode template - inline RealType median(const triangular_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType median(const triangular_distribution& dist) { BOOST_MATH_STD_USING // ADL of std functions. - static const char* function = "boost::math::median(const triangular_distribution<%1%>&)"; + constexpr auto function = "boost::math::median(const triangular_distribution<%1%>&)"; RealType mode = dist.mode(); RealType result = 0; // of checks. if(false == detail::check_triangular_mode(function, mode, &result, Policy())) @@ -475,11 +475,11 @@ namespace boost{ namespace math } // RealType mode template - inline RealType skewness(const triangular_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType skewness(const triangular_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions using namespace boost::math::constants; // for root_two - static const char* function = "boost::math::skewness(const triangular_distribution<%1%>&)"; + constexpr auto function = "boost::math::skewness(const triangular_distribution<%1%>&)"; RealType lower = dist.lower(); RealType mode = dist.mode(); @@ -496,9 +496,9 @@ namespace boost{ namespace math } // RealType skewness(const triangular_distribution& dist) template - inline RealType kurtosis(const triangular_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const triangular_distribution& dist) { // These checks may be belt and braces as should have been checked on construction? - static const char* function = "boost::math::kurtosis(const triangular_distribution<%1%>&)"; + constexpr auto function = "boost::math::kurtosis(const triangular_distribution<%1%>&)"; RealType lower = dist.lower(); RealType upper = dist.upper(); RealType mode = dist.mode(); @@ -511,9 +511,9 @@ namespace boost{ namespace math } // RealType kurtosis_excess(const triangular_distribution& dist) template - inline RealType kurtosis_excess(const triangular_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const triangular_distribution& dist) { // These checks may be belt and braces as should have been checked on construction? - static const char* function = "boost::math::kurtosis_excess(const triangular_distribution<%1%>&)"; + constexpr auto function = "boost::math::kurtosis_excess(const triangular_distribution<%1%>&)"; RealType lower = dist.lower(); RealType upper = dist.upper(); RealType mode = dist.mode(); @@ -527,9 +527,9 @@ namespace boost{ namespace math } template - inline RealType entropy(const triangular_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType entropy(const triangular_distribution& dist) { - using std::log; + BOOST_MATH_STD_USING return constants::half() + log((dist.upper() - dist.lower())/2); } diff --git a/include/boost/math/distributions/uniform.hpp b/include/boost/math/distributions/uniform.hpp index f57f8cc9f1..328fc61330 100644 --- a/include/boost/math/distributions/uniform.hpp +++ b/include/boost/math/distributions/uniform.hpp @@ -1,5 +1,6 @@ // Copyright John Maddock 2006. // Copyright Paul A. Bristow 2006. +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -15,18 +16,18 @@ // http://documents.wolfram.com/calculationcenter/v2/Functions/ListsMatrices/Statistics/UniformDistribution.html // http://en.wikipedia.org/wiki/Uniform_distribution_%28continuous%29 +#include +#include #include #include #include -#include - namespace boost{ namespace math { namespace detail { template - inline bool check_uniform_lower( + BOOST_MATH_GPU_ENABLED inline bool check_uniform_lower( const char* function, RealType lower, RealType* result, const Policy& pol) @@ -45,7 +46,7 @@ namespace boost{ namespace math } // bool check_uniform_lower( template - inline bool check_uniform_upper( + BOOST_MATH_GPU_ENABLED inline bool check_uniform_upper( const char* function, RealType upper, RealType* result, const Policy& pol) @@ -64,7 +65,7 @@ namespace boost{ namespace math } // bool check_uniform_upper( template - inline bool check_uniform_x( + BOOST_MATH_GPU_ENABLED inline bool check_uniform_x( const char* function, RealType const& x, RealType* result, const Policy& pol) @@ -83,7 +84,7 @@ namespace boost{ namespace math } // bool check_uniform_x template - inline bool check_uniform( + BOOST_MATH_GPU_ENABLED inline bool check_uniform( const char* function, RealType lower, RealType upper, @@ -116,19 +117,19 @@ namespace boost{ namespace math typedef RealType value_type; typedef Policy policy_type; - uniform_distribution(RealType l_lower = 0, RealType l_upper = 1) // Constructor. + BOOST_MATH_GPU_ENABLED uniform_distribution(RealType l_lower = 0, RealType l_upper = 1) // Constructor. : m_lower(l_lower), m_upper(l_upper) // Default is standard uniform distribution. { RealType result; detail::check_uniform("boost::math::uniform_distribution<%1%>::uniform_distribution", l_lower, l_upper, &result, Policy()); } // Accessor functions. - RealType lower()const + BOOST_MATH_GPU_ENABLED RealType lower()const { return m_lower; } - RealType upper()const + BOOST_MATH_GPU_ENABLED RealType upper()const { return m_upper; } @@ -148,23 +149,23 @@ namespace boost{ namespace math #endif template - inline const std::pair range(const uniform_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair range(const uniform_distribution& /* dist */) { // Range of permissible values for random variable x. using boost::math::tools::max_value; - return std::pair(-max_value(), max_value()); // - to + 'infinity'. + return boost::math::pair(-max_value(), max_value()); // - to + 'infinity'. // Note RealType infinity is NOT permitted, only max_value. } template - inline const std::pair support(const uniform_distribution& dist) + BOOST_MATH_GPU_ENABLED inline const boost::math::pair support(const uniform_distribution& dist) { // Range of supported values for random variable x. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. using boost::math::tools::max_value; - return std::pair(dist.lower(), dist.upper()); + return boost::math::pair(dist.lower(), dist.upper()); } template - inline RealType pdf(const uniform_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED inline RealType pdf(const uniform_distribution& dist, const RealType& x) { RealType lower = dist.lower(); RealType upper = dist.upper(); @@ -189,7 +190,7 @@ namespace boost{ namespace math } // RealType pdf(const uniform_distribution& dist, const RealType& x) template - inline RealType cdf(const uniform_distribution& dist, const RealType& x) + BOOST_MATH_GPU_ENABLED inline RealType cdf(const uniform_distribution& dist, const RealType& x) { RealType lower = dist.lower(); RealType upper = dist.upper(); @@ -214,7 +215,7 @@ namespace boost{ namespace math } // RealType cdf(const uniform_distribution& dist, const RealType& x) template - inline RealType quantile(const uniform_distribution& dist, const RealType& p) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const uniform_distribution& dist, const RealType& p) { RealType lower = dist.lower(); RealType upper = dist.upper(); @@ -239,7 +240,7 @@ namespace boost{ namespace math } // RealType quantile(const uniform_distribution& dist, const RealType& p) template - inline RealType cdf(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { RealType lower = c.dist.lower(); RealType upper = c.dist.upper(); @@ -265,7 +266,7 @@ namespace boost{ namespace math } // RealType cdf(const complemented2_type, RealType>& c) template - inline RealType quantile(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { RealType lower = c.dist.lower(); RealType upper = c.dist.upper(); @@ -291,7 +292,7 @@ namespace boost{ namespace math } // RealType quantile(const complemented2_type, RealType>& c) template - inline RealType mean(const uniform_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mean(const uniform_distribution& dist) { RealType lower = dist.lower(); RealType upper = dist.upper(); @@ -304,7 +305,7 @@ namespace boost{ namespace math } // RealType mean(const uniform_distribution& dist) template - inline RealType variance(const uniform_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType variance(const uniform_distribution& dist) { RealType lower = dist.lower(); RealType upper = dist.upper(); @@ -318,7 +319,7 @@ namespace boost{ namespace math } // RealType variance(const uniform_distribution& dist) template - inline RealType mode(const uniform_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mode(const uniform_distribution& dist) { RealType lower = dist.lower(); RealType upper = dist.upper(); @@ -332,7 +333,7 @@ namespace boost{ namespace math } template - inline RealType median(const uniform_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType median(const uniform_distribution& dist) { RealType lower = dist.lower(); RealType upper = dist.upper(); @@ -344,7 +345,7 @@ namespace boost{ namespace math return (lower + upper) / 2; // } template - inline RealType skewness(const uniform_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType skewness(const uniform_distribution& dist) { RealType lower = dist.lower(); RealType upper = dist.upper(); @@ -357,7 +358,7 @@ namespace boost{ namespace math } // RealType skewness(const uniform_distribution& dist) template - inline RealType kurtosis_excess(const uniform_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const uniform_distribution& dist) { RealType lower = dist.lower(); RealType upper = dist.upper(); @@ -370,15 +371,15 @@ namespace boost{ namespace math } // RealType kurtosis_excess(const uniform_distribution& dist) template - inline RealType kurtosis(const uniform_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const uniform_distribution& dist) { return kurtosis_excess(dist) + 3; } template - inline RealType entropy(const uniform_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType entropy(const uniform_distribution& dist) { - using std::log; + BOOST_MATH_STD_USING return log(dist.upper() - dist.lower()); } diff --git a/include/boost/math/distributions/weibull.hpp b/include/boost/math/distributions/weibull.hpp index ca4bbd7b53..eb4de106c8 100644 --- a/include/boost/math/distributions/weibull.hpp +++ b/include/boost/math/distributions/weibull.hpp @@ -1,4 +1,5 @@ // Copyright John Maddock 2006. +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -9,6 +10,10 @@ // http://www.itl.nist.gov/div898/handbook/eda/section3/eda3668.htm // http://mathworld.wolfram.com/WeibullDistribution.html +#include +#include +#include +#include #include #include #include @@ -16,14 +21,12 @@ #include #include -#include - namespace boost{ namespace math { namespace detail{ template -inline bool check_weibull_shape( +BOOST_MATH_GPU_ENABLED inline bool check_weibull_shape( const char* function, RealType shape, RealType* result, const Policy& pol) @@ -39,7 +42,7 @@ inline bool check_weibull_shape( } template -inline bool check_weibull_x( +BOOST_MATH_GPU_ENABLED inline bool check_weibull_x( const char* function, RealType const& x, RealType* result, const Policy& pol) @@ -55,7 +58,7 @@ inline bool check_weibull_x( } template -inline bool check_weibull( +BOOST_MATH_GPU_ENABLED inline bool check_weibull( const char* function, RealType scale, RealType shape, @@ -73,19 +76,19 @@ class weibull_distribution using value_type = RealType; using policy_type = Policy; - explicit weibull_distribution(RealType l_shape, RealType l_scale = 1) + BOOST_MATH_GPU_ENABLED explicit weibull_distribution(RealType l_shape, RealType l_scale = 1) : m_shape(l_shape), m_scale(l_scale) { RealType result; detail::check_weibull("boost::math::weibull_distribution<%1%>::weibull_distribution", l_scale, l_shape, &result, Policy()); } - RealType shape()const + BOOST_MATH_GPU_ENABLED RealType shape()const { return m_shape; } - RealType scale()const + BOOST_MATH_GPU_ENABLED RealType scale()const { return m_scale; } @@ -107,28 +110,28 @@ weibull_distribution(RealType,RealType)->weibull_distribution -inline std::pair range(const weibull_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline boost::math::pair range(const weibull_distribution& /*dist*/) { // Range of permissible values for random variable x. using boost::math::tools::max_value; - return std::pair(static_cast(0), max_value()); + return boost::math::pair(static_cast(0), max_value()); } template -inline std::pair support(const weibull_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline boost::math::pair support(const weibull_distribution& /*dist*/) { // Range of supported values for random variable x. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. using boost::math::tools::max_value; using boost::math::tools::min_value; - return std::pair(min_value(), max_value()); + return boost::math::pair(min_value(), max_value()); // A discontinuity at x == 0, so only support down to min_value. } template -inline RealType pdf(const weibull_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType pdf(const weibull_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::pdf(const weibull_distribution<%1%>, %1%)"; + constexpr auto function = "boost::math::pdf(const weibull_distribution<%1%>, %1%)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -158,11 +161,11 @@ inline RealType pdf(const weibull_distribution& dist, const Re } template -inline RealType logpdf(const weibull_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType logpdf(const weibull_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::logpdf(const weibull_distribution<%1%>, %1%)"; + constexpr auto function = "boost::math::logpdf(const weibull_distribution<%1%>, %1%)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -192,11 +195,11 @@ inline RealType logpdf(const weibull_distribution& dist, const } template -inline RealType cdf(const weibull_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const weibull_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(const weibull_distribution<%1%>, %1%)"; + constexpr auto function = "boost::math::cdf(const weibull_distribution<%1%>, %1%)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -213,11 +216,11 @@ inline RealType cdf(const weibull_distribution& dist, const Re } template -inline RealType logcdf(const weibull_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType logcdf(const weibull_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::logcdf(const weibull_distribution<%1%>, %1%)"; + constexpr auto function = "boost::math::logcdf(const weibull_distribution<%1%>, %1%)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -234,11 +237,11 @@ inline RealType logcdf(const weibull_distribution& dist, const } template -inline RealType quantile(const weibull_distribution& dist, const RealType& p) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const weibull_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const weibull_distribution<%1%>, %1%)"; + constexpr auto function = "boost::math::quantile(const weibull_distribution<%1%>, %1%)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -258,11 +261,11 @@ inline RealType quantile(const weibull_distribution& dist, con } template -inline RealType cdf(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(const weibull_distribution<%1%>, %1%)"; + constexpr auto function = "boost::math::cdf(const weibull_distribution<%1%>, %1%)"; RealType shape = c.dist.shape(); RealType scale = c.dist.scale(); @@ -279,11 +282,11 @@ inline RealType cdf(const complemented2_type -inline RealType logcdf(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType logcdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::logcdf(const weibull_distribution<%1%>, %1%)"; + constexpr auto function = "boost::math::logcdf(const weibull_distribution<%1%>, %1%)"; RealType shape = c.dist.shape(); RealType scale = c.dist.scale(); @@ -300,11 +303,11 @@ inline RealType logcdf(const complemented2_type -inline RealType quantile(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const weibull_distribution<%1%>, %1%)"; + constexpr auto function = "boost::math::quantile(const weibull_distribution<%1%>, %1%)"; RealType shape = c.dist.shape(); RealType scale = c.dist.scale(); @@ -325,11 +328,11 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const weibull_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mean(const weibull_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::mean(const weibull_distribution<%1%>)"; + constexpr auto function = "boost::math::mean(const weibull_distribution<%1%>)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -343,12 +346,12 @@ inline RealType mean(const weibull_distribution& dist) } template -inline RealType variance(const weibull_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType variance(const weibull_distribution& dist) { RealType shape = dist.shape(); RealType scale = dist.scale(); - static const char* function = "boost::math::variance(const weibull_distribution<%1%>)"; + constexpr auto function = "boost::math::variance(const weibull_distribution<%1%>)"; RealType result = 0; if(false == detail::check_weibull(function, scale, shape, &result, Policy())) @@ -363,11 +366,11 @@ inline RealType variance(const weibull_distribution& dist) } template -inline RealType mode(const weibull_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mode(const weibull_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std function pow. - static const char* function = "boost::math::mode(const weibull_distribution<%1%>)"; + constexpr auto function = "boost::math::mode(const weibull_distribution<%1%>)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -384,11 +387,11 @@ inline RealType mode(const weibull_distribution& dist) } template -inline RealType median(const weibull_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType median(const weibull_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std function pow. - static const char* function = "boost::math::median(const weibull_distribution<%1%>)"; + constexpr auto function = "boost::math::median(const weibull_distribution<%1%>)"; RealType shape = dist.shape(); // Wikipedia k RealType scale = dist.scale(); // Wikipedia lambda @@ -404,11 +407,11 @@ inline RealType median(const weibull_distribution& dist) } template -inline RealType skewness(const weibull_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType skewness(const weibull_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::skewness(const weibull_distribution<%1%>)"; + constexpr auto function = "boost::math::skewness(const weibull_distribution<%1%>)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -429,11 +432,11 @@ inline RealType skewness(const weibull_distribution& dist) } template -inline RealType kurtosis_excess(const weibull_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const weibull_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::kurtosis_excess(const weibull_distribution<%1%>)"; + constexpr auto function = "boost::math::kurtosis_excess(const weibull_distribution<%1%>)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -457,15 +460,15 @@ inline RealType kurtosis_excess(const weibull_distribution& di } template -inline RealType kurtosis(const weibull_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const weibull_distribution& dist) { return kurtosis_excess(dist) + 3; } template -inline RealType entropy(const weibull_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType entropy(const weibull_distribution& dist) { - using std::log; + BOOST_MATH_STD_USING RealType k = dist.shape(); RealType lambda = dist.scale(); return constants::euler()*(1-1/k) + log(lambda/k) + 1; diff --git a/include/boost/math/policies/error_handling.hpp b/include/boost/math/policies/error_handling.hpp index 070266c7fe..0a22dffa7f 100644 --- a/include/boost/math/policies/error_handling.hpp +++ b/include/boost/math/policies/error_handling.hpp @@ -1,6 +1,6 @@ // Copyright John Maddock 2007. // Copyright Paul A. Bristow 2007. - +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -9,6 +9,15 @@ #define BOOST_MATH_POLICY_ERROR_HANDLING_HPP #include +#include +#include +#include +#include +#include +#include + +#ifndef BOOST_MATH_HAS_NVRTC + #include #include #include @@ -19,8 +28,6 @@ #include #include #include -#include -#include #ifndef BOOST_MATH_NO_EXCEPTIONS #include #include @@ -199,7 +206,7 @@ void raise_error(const char* pfunction, const char* pmessage, const T& val) #endif template -inline T raise_domain_error( +BOOST_MATH_GPU_ENABLED inline T raise_domain_error( const char* function, const char* message, const T& val, @@ -210,12 +217,12 @@ inline T raise_domain_error( #else raise_error(function, message, val); // we never get here: - return std::numeric_limits::quiet_NaN(); + return boost::math::numeric_limits::quiet_NaN(); #endif } template -inline constexpr T raise_domain_error( +BOOST_MATH_GPU_ENABLED constexpr T raise_domain_error( const char* , const char* , const T& , @@ -223,11 +230,11 @@ inline constexpr T raise_domain_error( { // This may or may not do the right thing, but the user asked for the error // to be ignored so here we go anyway: - return std::numeric_limits::quiet_NaN(); + return boost::math::numeric_limits::quiet_NaN(); } template -inline T raise_domain_error( +BOOST_MATH_GPU_ENABLED inline T raise_domain_error( const char* , const char* , const T& , @@ -236,11 +243,11 @@ inline T raise_domain_error( errno = EDOM; // This may or may not do the right thing, but the user asked for the error // to be silent so here we go anyway: - return std::numeric_limits::quiet_NaN(); + return boost::math::numeric_limits::quiet_NaN(); } template -inline T raise_domain_error( +BOOST_MATH_GPU_ENABLED inline T raise_domain_error( const char* function, const char* message, const T& val, @@ -250,7 +257,7 @@ inline T raise_domain_error( } template -inline T raise_pole_error( +BOOST_MATH_GPU_ENABLED inline T raise_pole_error( const char* function, const char* message, const T& val, @@ -264,7 +271,7 @@ inline T raise_pole_error( } template -inline constexpr T raise_pole_error( +BOOST_MATH_GPU_ENABLED constexpr T raise_pole_error( const char* function, const char* message, const T& val, @@ -274,7 +281,7 @@ inline constexpr T raise_pole_error( } template -inline constexpr T raise_pole_error( +BOOST_MATH_GPU_ENABLED constexpr T raise_pole_error( const char* function, const char* message, const T& val, @@ -284,7 +291,7 @@ inline constexpr T raise_pole_error( } template -inline T raise_pole_error( +BOOST_MATH_GPU_ENABLED inline T raise_pole_error( const char* function, const char* message, const T& val, @@ -294,7 +301,7 @@ inline T raise_pole_error( } template -inline T raise_overflow_error( +BOOST_MATH_GPU_ENABLED inline T raise_overflow_error( const char* function, const char* message, const ::boost::math::policies::overflow_error< ::boost::math::policies::throw_on_error>&) @@ -304,12 +311,12 @@ inline T raise_overflow_error( #else raise_error(function, message ? message : "numeric overflow"); // We should never get here: - return std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : boost::math::tools::max_value(); + return boost::math::numeric_limits::has_infinity ? boost::math::numeric_limits::infinity() : boost::math::tools::max_value(); #endif } template -inline T raise_overflow_error( +BOOST_MATH_GPU_ENABLED inline T raise_overflow_error( const char* function, const char* message, const T& val, @@ -320,23 +327,23 @@ inline T raise_overflow_error( #else raise_error(function, message ? message : "numeric overflow", val); // We should never get here: - return std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : boost::math::tools::max_value(); + return boost::math::numeric_limits::has_infinity ? boost::math::numeric_limits::infinity() : boost::math::tools::max_value(); #endif } template -inline constexpr T raise_overflow_error( +BOOST_MATH_GPU_ENABLED constexpr T raise_overflow_error( const char* , const char* , const ::boost::math::policies::overflow_error< ::boost::math::policies::ignore_error>&) BOOST_MATH_NOEXCEPT(T) { // This may or may not do the right thing, but the user asked for the error // to be ignored so here we go anyway: - return std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : boost::math::tools::max_value(); + return boost::math::numeric_limits::has_infinity ? boost::math::numeric_limits::infinity() : boost::math::tools::max_value(); } template -inline constexpr T raise_overflow_error( +BOOST_MATH_GPU_ENABLED constexpr T raise_overflow_error( const char* , const char* , const T&, @@ -344,11 +351,11 @@ inline constexpr T raise_overflow_error( { // This may or may not do the right thing, but the user asked for the error // to be ignored so here we go anyway: - return std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : boost::math::tools::max_value(); + return boost::math::numeric_limits::has_infinity ? boost::math::numeric_limits::infinity() : boost::math::tools::max_value(); } template -inline T raise_overflow_error( +BOOST_MATH_GPU_ENABLED inline T raise_overflow_error( const char* , const char* , const ::boost::math::policies::overflow_error< ::boost::math::policies::errno_on_error>&) BOOST_MATH_NOEXCEPT(T) @@ -356,11 +363,11 @@ inline T raise_overflow_error( errno = ERANGE; // This may or may not do the right thing, but the user asked for the error // to be silent so here we go anyway: - return std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : boost::math::tools::max_value(); + return boost::math::numeric_limits::has_infinity ? boost::math::numeric_limits::infinity() : boost::math::tools::max_value(); } template -inline T raise_overflow_error( +BOOST_MATH_GPU_ENABLED inline T raise_overflow_error( const char* , const char* , const T&, @@ -369,20 +376,20 @@ inline T raise_overflow_error( errno = ERANGE; // This may or may not do the right thing, but the user asked for the error // to be silent so here we go anyway: - return std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : boost::math::tools::max_value(); + return boost::math::numeric_limits::has_infinity ? boost::math::numeric_limits::infinity() : boost::math::tools::max_value(); } template -inline T raise_overflow_error( +BOOST_MATH_GPU_ENABLED inline T raise_overflow_error( const char* function, const char* message, const ::boost::math::policies::overflow_error< ::boost::math::policies::user_error>&) { - return user_overflow_error(function, message, std::numeric_limits::infinity()); + return user_overflow_error(function, message, boost::math::numeric_limits::infinity()); } template -inline T raise_overflow_error( +BOOST_MATH_GPU_ENABLED inline T raise_overflow_error( const char* function, const char* message, const T& val, @@ -392,11 +399,11 @@ inline T raise_overflow_error( std::string sval = prec_format(val); replace_all_in_string(m, "%1%", sval.c_str()); - return user_overflow_error(function, m.c_str(), std::numeric_limits::infinity()); + return user_overflow_error(function, m.c_str(), boost::math::numeric_limits::infinity()); } template -inline T raise_underflow_error( +BOOST_MATH_GPU_ENABLED inline T raise_underflow_error( const char* function, const char* message, const ::boost::math::policies::underflow_error< ::boost::math::policies::throw_on_error>&) @@ -411,7 +418,7 @@ inline T raise_underflow_error( } template -inline constexpr T raise_underflow_error( +BOOST_MATH_GPU_ENABLED constexpr T raise_underflow_error( const char* , const char* , const ::boost::math::policies::underflow_error< ::boost::math::policies::ignore_error>&) BOOST_MATH_NOEXCEPT(T) @@ -422,7 +429,7 @@ inline constexpr T raise_underflow_error( } template -inline T raise_underflow_error( +BOOST_MATH_GPU_ENABLED inline T raise_underflow_error( const char* /* function */, const char* /* message */, const ::boost::math::policies::underflow_error< ::boost::math::policies::errno_on_error>&) BOOST_MATH_NOEXCEPT(T) @@ -434,7 +441,7 @@ inline T raise_underflow_error( } template -inline T raise_underflow_error( +BOOST_MATH_GPU_ENABLED inline T raise_underflow_error( const char* function, const char* message, const ::boost::math::policies::underflow_error< ::boost::math::policies::user_error>&) @@ -443,7 +450,7 @@ inline T raise_underflow_error( } template -inline T raise_denorm_error( +BOOST_MATH_GPU_ENABLED inline T raise_denorm_error( const char* function, const char* message, const T& /* val */, @@ -459,7 +466,7 @@ inline T raise_denorm_error( } template -inline constexpr T raise_denorm_error( +BOOST_MATH_GPU_ENABLED inline constexpr T raise_denorm_error( const char* , const char* , const T& val, @@ -471,7 +478,7 @@ inline constexpr T raise_denorm_error( } template -inline T raise_denorm_error( +BOOST_MATH_GPU_ENABLED inline T raise_denorm_error( const char* , const char* , const T& val, @@ -484,7 +491,7 @@ inline T raise_denorm_error( } template -inline T raise_denorm_error( +BOOST_MATH_GPU_ENABLED inline T raise_denorm_error( const char* function, const char* message, const T& val, @@ -494,7 +501,7 @@ inline T raise_denorm_error( } template -inline T raise_evaluation_error( +BOOST_MATH_GPU_ENABLED inline T raise_evaluation_error( const char* function, const char* message, const T& val, @@ -510,7 +517,7 @@ inline T raise_evaluation_error( } template -inline constexpr T raise_evaluation_error( +BOOST_MATH_GPU_ENABLED constexpr T raise_evaluation_error( const char* , const char* , const T& val, @@ -522,7 +529,7 @@ inline constexpr T raise_evaluation_error( } template -inline T raise_evaluation_error( +BOOST_MATH_GPU_ENABLED inline T raise_evaluation_error( const char* , const char* , const T& val, @@ -535,7 +542,7 @@ inline T raise_evaluation_error( } template -inline T raise_evaluation_error( +BOOST_MATH_GPU_ENABLED inline T raise_evaluation_error( const char* function, const char* message, const T& val, @@ -545,7 +552,7 @@ inline T raise_evaluation_error( } template -inline TargetType raise_rounding_error( +BOOST_MATH_GPU_ENABLED inline TargetType raise_rounding_error( const char* function, const char* message, const T& val, @@ -562,7 +569,7 @@ inline TargetType raise_rounding_error( } template -inline constexpr TargetType raise_rounding_error( +BOOST_MATH_GPU_ENABLED constexpr TargetType raise_rounding_error( const char* , const char* , const T& val, @@ -571,12 +578,12 @@ inline constexpr TargetType raise_rounding_error( { // This may or may not do the right thing, but the user asked for the error // to be ignored so here we go anyway: - static_assert(std::numeric_limits::is_specialized, "The target type must have std::numeric_limits specialized."); - return val > 0 ? (std::numeric_limits::max)() : (std::numeric_limits::is_integer ? (std::numeric_limits::min)() : -(std::numeric_limits::max)()); + static_assert(boost::math::numeric_limits::is_specialized, "The target type must have std::numeric_limits specialized."); + return val > 0 ? (boost::math::numeric_limits::max)() : (boost::math::numeric_limits::is_integer ? (boost::math::numeric_limits::min)() : -(boost::math::numeric_limits::max)()); } template -inline TargetType raise_rounding_error( +BOOST_MATH_GPU_ENABLED inline TargetType raise_rounding_error( const char* , const char* , const T& val, @@ -586,11 +593,11 @@ inline TargetType raise_rounding_error( errno = ERANGE; // This may or may not do the right thing, but the user asked for the error // to be silent so here we go anyway: - static_assert(std::numeric_limits::is_specialized, "The target type must have std::numeric_limits specialized."); - return val > 0 ? (std::numeric_limits::max)() : (std::numeric_limits::is_integer ? (std::numeric_limits::min)() : -(std::numeric_limits::max)()); + static_assert(boost::math::numeric_limits::is_specialized, "The target type must have std::numeric_limits specialized."); + return val > 0 ? (boost::math::numeric_limits::max)() : (boost::math::numeric_limits::is_integer ? (boost::math::numeric_limits::min)() : -(boost::math::numeric_limits::max)()); } template -inline TargetType raise_rounding_error( +BOOST_MATH_GPU_ENABLED inline TargetType raise_rounding_error( const char* function, const char* message, const T& val, @@ -601,7 +608,7 @@ inline TargetType raise_rounding_error( } template -inline T raise_indeterminate_result_error( +BOOST_MATH_GPU_ENABLED inline T raise_indeterminate_result_error( const char* function, const char* message, const T& val, @@ -613,12 +620,12 @@ inline T raise_indeterminate_result_error( #else raise_error(function, message, val); // we never get here: - return std::numeric_limits::quiet_NaN(); + return boost::math::numeric_limits::quiet_NaN(); #endif } template -inline constexpr T raise_indeterminate_result_error( +BOOST_MATH_GPU_ENABLED inline constexpr T raise_indeterminate_result_error( const char* , const char* , const T& , @@ -631,7 +638,7 @@ inline constexpr T raise_indeterminate_result_error( } template -inline T raise_indeterminate_result_error( +BOOST_MATH_GPU_ENABLED inline T raise_indeterminate_result_error( const char* , const char* , const T& , @@ -645,7 +652,7 @@ inline T raise_indeterminate_result_error( } template -inline T raise_indeterminate_result_error( +BOOST_MATH_GPU_ENABLED inline T raise_indeterminate_result_error( const char* function, const char* message, const T& val, @@ -658,7 +665,7 @@ inline T raise_indeterminate_result_error( } // namespace detail template -inline constexpr T raise_domain_error(const char* function, const char* message, const T& val, const Policy&) noexcept(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) +BOOST_MATH_GPU_ENABLED constexpr T raise_domain_error(const char* function, const char* message, const T& val, const Policy&) noexcept(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) { typedef typename Policy::domain_error_type policy_type; return detail::raise_domain_error( @@ -667,7 +674,7 @@ inline constexpr T raise_domain_error(const char* function, const char* message, } template -inline constexpr T raise_pole_error(const char* function, const char* message, const T& val, const Policy&) noexcept(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) +BOOST_MATH_GPU_ENABLED constexpr T raise_pole_error(const char* function, const char* message, const T& val, const Policy&) noexcept(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) { typedef typename Policy::pole_error_type policy_type; return detail::raise_pole_error( @@ -676,7 +683,7 @@ inline constexpr T raise_pole_error(const char* function, const char* message, c } template -inline constexpr T raise_overflow_error(const char* function, const char* message, const Policy&) noexcept(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) +BOOST_MATH_GPU_ENABLED constexpr T raise_overflow_error(const char* function, const char* message, const Policy&) noexcept(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) { typedef typename Policy::overflow_error_type policy_type; return detail::raise_overflow_error( @@ -685,7 +692,7 @@ inline constexpr T raise_overflow_error(const char* function, const char* messag } template -inline constexpr T raise_overflow_error(const char* function, const char* message, const T& val, const Policy&) noexcept(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) +BOOST_MATH_GPU_ENABLED constexpr T raise_overflow_error(const char* function, const char* message, const T& val, const Policy&) noexcept(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) { typedef typename Policy::overflow_error_type policy_type; return detail::raise_overflow_error( @@ -694,7 +701,7 @@ inline constexpr T raise_overflow_error(const char* function, const char* messag } template -inline constexpr T raise_underflow_error(const char* function, const char* message, const Policy&) noexcept(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) +BOOST_MATH_GPU_ENABLED constexpr T raise_underflow_error(const char* function, const char* message, const Policy&) noexcept(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) { typedef typename Policy::underflow_error_type policy_type; return detail::raise_underflow_error( @@ -703,7 +710,7 @@ inline constexpr T raise_underflow_error(const char* function, const char* messa } template -inline constexpr T raise_denorm_error(const char* function, const char* message, const T& val, const Policy&) noexcept(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) +BOOST_MATH_GPU_ENABLED constexpr T raise_denorm_error(const char* function, const char* message, const T& val, const Policy&) noexcept(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) { typedef typename Policy::denorm_error_type policy_type; return detail::raise_denorm_error( @@ -713,7 +720,7 @@ inline constexpr T raise_denorm_error(const char* function, const char* message, } template -inline constexpr T raise_evaluation_error(const char* function, const char* message, const T& val, const Policy&) noexcept(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) +BOOST_MATH_GPU_ENABLED constexpr T raise_evaluation_error(const char* function, const char* message, const T& val, const Policy&) noexcept(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) { typedef typename Policy::evaluation_error_type policy_type; return detail::raise_evaluation_error( @@ -722,7 +729,7 @@ inline constexpr T raise_evaluation_error(const char* function, const char* mess } template -inline constexpr TargetType raise_rounding_error(const char* function, const char* message, const T& val, const TargetType& t, const Policy&) noexcept(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) +BOOST_MATH_GPU_ENABLED constexpr TargetType raise_rounding_error(const char* function, const char* message, const T& val, const TargetType& t, const Policy&) noexcept(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) { typedef typename Policy::rounding_error_type policy_type; return detail::raise_rounding_error( @@ -731,7 +738,7 @@ inline constexpr TargetType raise_rounding_error(const char* function, const cha } template -inline constexpr T raise_indeterminate_result_error(const char* function, const char* message, const T& val, const R& result, const Policy&) noexcept(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) +BOOST_MATH_GPU_ENABLED constexpr T raise_indeterminate_result_error(const char* function, const char* message, const T& val, const R& result, const Policy&) noexcept(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) { typedef typename Policy::indeterminate_result_error_type policy_type; return detail::raise_indeterminate_result_error( @@ -746,7 +753,7 @@ namespace detail { template -BOOST_MATH_FORCEINLINE bool check_overflow(T val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error)) +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE bool check_overflow(T val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error)) { BOOST_MATH_STD_USING if(fabs(val) > tools::max_value()) @@ -758,7 +765,7 @@ BOOST_MATH_FORCEINLINE bool check_overflow(T val, R* result, const char* functio return false; } template -BOOST_MATH_FORCEINLINE bool check_overflow(std::complex val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error)) +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE bool check_overflow(std::complex val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error)) { typedef typename R::value_type r_type; r_type re, im; @@ -768,7 +775,7 @@ BOOST_MATH_FORCEINLINE bool check_overflow(std::complex val, R* result, const return r; } template -BOOST_MATH_FORCEINLINE bool check_underflow(T val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error)) +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE bool check_underflow(T val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error)) { if((val != 0) && (static_cast(val) == 0)) { @@ -778,7 +785,7 @@ BOOST_MATH_FORCEINLINE bool check_underflow(T val, R* result, const char* functi return false; } template -BOOST_MATH_FORCEINLINE bool check_underflow(std::complex val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error)) +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE bool check_underflow(std::complex val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error)) { typedef typename R::value_type r_type; r_type re, im; @@ -788,7 +795,7 @@ BOOST_MATH_FORCEINLINE bool check_underflow(std::complex val, R* result, cons return r; } template -BOOST_MATH_FORCEINLINE bool check_denorm(T val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error)) +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE bool check_denorm(T val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error)) { BOOST_MATH_STD_USING if((fabs(val) < static_cast(tools::min_value())) && (static_cast(val) != 0)) @@ -799,7 +806,7 @@ BOOST_MATH_FORCEINLINE bool check_denorm(T val, R* result, const char* function, return false; } template -BOOST_MATH_FORCEINLINE bool check_denorm(std::complex val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error)) +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE bool check_denorm(std::complex val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error)) { typedef typename R::value_type r_type; r_type re, im; @@ -811,28 +818,28 @@ BOOST_MATH_FORCEINLINE bool check_denorm(std::complex val, R* result, const c // Default instantiations with ignore_error policy. template -BOOST_MATH_FORCEINLINE constexpr bool check_overflow(T /* val */, R* /* result */, const char* /* function */, const overflow_error&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE constexpr bool check_overflow(T /* val */, R* /* result */, const char* /* function */, const overflow_error&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) { return false; } template -BOOST_MATH_FORCEINLINE constexpr bool check_overflow(std::complex /* val */, R* /* result */, const char* /* function */, const overflow_error&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE constexpr bool check_overflow(std::complex /* val */, R* /* result */, const char* /* function */, const overflow_error&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) { return false; } template -BOOST_MATH_FORCEINLINE constexpr bool check_underflow(T /* val */, R* /* result */, const char* /* function */, const underflow_error&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE constexpr bool check_underflow(T /* val */, R* /* result */, const char* /* function */, const underflow_error&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) { return false; } template -BOOST_MATH_FORCEINLINE constexpr bool check_underflow(std::complex /* val */, R* /* result */, const char* /* function */, const underflow_error&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE constexpr bool check_underflow(std::complex /* val */, R* /* result */, const char* /* function */, const underflow_error&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) { return false; } template -BOOST_MATH_FORCEINLINE constexpr bool check_denorm(T /* val */, R* /* result*/, const char* /* function */, const denorm_error&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE constexpr bool check_denorm(T /* val */, R* /* result*/, const char* /* function */, const denorm_error&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) { return false; } template -BOOST_MATH_FORCEINLINE constexpr bool check_denorm(std::complex /* val */, R* /* result*/, const char* /* function */, const denorm_error&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE constexpr bool check_denorm(std::complex /* val */, R* /* result*/, const char* /* function */, const denorm_error&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) { return false; } } // namespace detail template -BOOST_MATH_FORCEINLINE R checked_narrowing_cast(T val, const char* function) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && is_noexcept_error_policy::value) +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE R checked_narrowing_cast(T val, const char* function) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && is_noexcept_error_policy::value) { typedef typename Policy::overflow_error_type overflow_type; typedef typename Policy::underflow_error_type underflow_type; @@ -852,7 +859,7 @@ BOOST_MATH_FORCEINLINE R checked_narrowing_cast(T val, const char* function) noe } template -inline void check_series_iterations(const char* function, std::uintmax_t max_iter, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(T) && is_noexcept_error_policy::value) +BOOST_MATH_GPU_ENABLED inline void check_series_iterations(const char* function, std::uintmax_t max_iter, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(T) && is_noexcept_error_policy::value) { if(max_iter >= policies::get_max_series_iterations()) raise_evaluation_error( @@ -861,7 +868,7 @@ inline void check_series_iterations(const char* function, std::uintmax_t max_ite } template -inline void check_root_iterations(const char* function, std::uintmax_t max_iter, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(T) && is_noexcept_error_policy::value) +BOOST_MATH_GPU_ENABLED inline void check_root_iterations(const char* function, std::uintmax_t max_iter, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(T) && is_noexcept_error_policy::value) { if(max_iter >= policies::get_max_root_iterations()) raise_evaluation_error( @@ -871,25 +878,169 @@ inline void check_root_iterations(const char* function, std::uintmax_t max_iter, } //namespace policies -namespace detail{ +#ifdef _MSC_VER +# pragma warning(pop) +#endif + +}} // namespaces boost/math + +#else // Special values for NVRTC + +namespace boost { +namespace math { +namespace policies { + +template +BOOST_MATH_GPU_ENABLED constexpr T raise_domain_error( + const char* , + const char* , + const T& , + const Policy&) BOOST_MATH_NOEXCEPT(T) +{ + // This may or may not do the right thing, but the user asked for the error + // to be ignored so here we go anyway: + return boost::math::numeric_limits::quiet_NaN(); +} + +template +BOOST_MATH_GPU_ENABLED constexpr T raise_pole_error( + const char* function, + const char* message, + const T& val, + const Policy&) BOOST_MATH_NOEXCEPT(T) +{ + return boost::math::numeric_limits::quiet_NaN(); +} + +template +BOOST_MATH_GPU_ENABLED constexpr T raise_overflow_error( + const char* , + const char* , + const Policy&) BOOST_MATH_NOEXCEPT(T) +{ + // This may or may not do the right thing, but the user asked for the error + // to be ignored so here we go anyway: + return boost::math::numeric_limits::has_infinity ? boost::math::numeric_limits::infinity() : (boost::math::numeric_limits::max)(); +} + +template +BOOST_MATH_GPU_ENABLED constexpr T raise_overflow_error( + const char* , + const char* , + const T&, + const Policy&) BOOST_MATH_NOEXCEPT(T) +{ + // This may or may not do the right thing, but the user asked for the error + // to be ignored so here we go anyway: + return boost::math::numeric_limits::has_infinity ? boost::math::numeric_limits::infinity() : (boost::math::numeric_limits::max)(); +} + +template +BOOST_MATH_GPU_ENABLED constexpr T raise_underflow_error( + const char* , + const char* , + const Policy&) BOOST_MATH_NOEXCEPT(T) +{ + // This may or may not do the right thing, but the user asked for the error + // to be ignored so here we go anyway: + return static_cast(0); +} + +template +BOOST_MATH_GPU_ENABLED inline constexpr T raise_denorm_error( + const char* , + const char* , + const T& val, + const Policy&) BOOST_MATH_NOEXCEPT(T) +{ + // This may or may not do the right thing, but the user asked for the error + // to be ignored so here we go anyway: + return val; +} + +template +BOOST_MATH_GPU_ENABLED constexpr T raise_evaluation_error( + const char* , + const char* , + const T& val, + const Policy&) BOOST_MATH_NOEXCEPT(T) +{ + // This may or may not do the right thing, but the user asked for the error + // to be ignored so here we go anyway: + return val; +} + +template +BOOST_MATH_GPU_ENABLED constexpr TargetType raise_rounding_error( + const char* , + const char* , + const T& val, + const TargetType&, + const Policy&) BOOST_MATH_NOEXCEPT(T) +{ + // This may or may not do the right thing, but the user asked for the error + // to be ignored so here we go anyway: + static_assert(boost::math::numeric_limits::is_specialized, "The target type must have std::numeric_limits specialized."); + return val > 0 ? (boost::math::numeric_limits::max)() : (boost::math::numeric_limits::is_integer ? (boost::math::numeric_limits::min)() : -(boost::math::numeric_limits::max)()); +} + +template +BOOST_MATH_GPU_ENABLED inline constexpr T raise_indeterminate_result_error( + const char* , + const char* , + const T& , + const R& result, + const Policy&) BOOST_MATH_NOEXCEPT(T) +{ + // This may or may not do the right thing, but the user asked for the error + // to be ignored so here we go anyway: + return result; +} + +template +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE R checked_narrowing_cast(T val, const char* function) noexcept(boost::math::is_floating_point_v && boost::math::is_floating_point_v) +{ + // We only have ignore error policy so no reason to check + return static_cast(val); +} + +template +BOOST_MATH_GPU_ENABLED inline void check_series_iterations(const char* function, boost::math::uintmax_t max_iter, const Policy& pol) noexcept(boost::math::is_floating_point_v) +{ + if(max_iter >= policies::get_max_series_iterations()) + raise_evaluation_error( + function, + "Series evaluation exceeded %1% iterations, giving up now.", static_cast(static_cast(max_iter)), pol); +} + +template +BOOST_MATH_GPU_ENABLED inline void check_root_iterations(const char* function, boost::math::uintmax_t max_iter, const Policy& pol) noexcept(boost::math::is_floating_point_v) +{ + if(max_iter >= policies::get_max_root_iterations()) + raise_evaluation_error( + function, + "Root finding evaluation exceeded %1% iterations, giving up now.", static_cast(static_cast(max_iter)), pol); +} + +} // namespace policies +} // namespace math +} // namespace boost + +#endif // BOOST_MATH_HAS_NVRTC + +namespace boost { namespace math { namespace detail { // // Simple helper function to assist in returning a pair from a single value, // that value usually comes from one of the error handlers above: // template -std::pair pair_from_single(const T& val) BOOST_MATH_NOEXCEPT(T) +BOOST_MATH_GPU_ENABLED boost::math::pair pair_from_single(const T& val) BOOST_MATH_NOEXCEPT(T) { - return std::make_pair(val, val); + return boost::math::make_pair(val, val); } -} - -#ifdef _MSC_VER -# pragma warning(pop) -#endif - -}} // namespaces boost/math +}}} // boost::math::detail #endif // BOOST_MATH_POLICY_ERROR_HANDLING_HPP diff --git a/include/boost/math/policies/policy.hpp b/include/boost/math/policies/policy.hpp index eb09682e32..ec7b36f2d5 100644 --- a/include/boost/math/policies/policy.hpp +++ b/include/boost/math/policies/policy.hpp @@ -9,11 +9,9 @@ #include #include -#include -#include -#include -#include -#include +#include +#include +#include namespace boost{ namespace math{ @@ -22,9 +20,9 @@ namespace mp = tools::meta_programming; namespace tools{ template -constexpr int digits(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept; +BOOST_MATH_GPU_ENABLED constexpr int digits(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept; template -constexpr T epsilon(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(std::is_floating_point::value); +BOOST_MATH_GPU_ENABLED constexpr T epsilon(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(boost::math::is_floating_point::value); } @@ -33,6 +31,33 @@ namespace policies{ // // Define macros for our default policies, if they're not defined already: // + + +// +// Generic support for GPUs +// +#ifdef BOOST_MATH_HAS_GPU_SUPPORT +# ifndef BOOST_MATH_OVERFLOW_ERROR_POLICY +# define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +# endif +# ifndef BOOST_MATH_PROMOTE_DOUBLE_POLICY +# define BOOST_MATH_PROMOTE_DOUBLE_POLICY false +# endif +# ifndef BOOST_MATH_DOMAIN_ERROR_POLICY +# define BOOST_MATH_DOMAIN_ERROR_POLICY ignore_error +# endif +# ifndef BOOST_MATH_POLE_ERROR_POLICY +# define BOOST_MATH_POLE_ERROR_POLICY ignore_error +# endif +# ifndef BOOST_MATH_EVALUATION_ERROR_POLICY +# define BOOST_MATH_EVALUATION_ERROR_POLICY ignore_error +# endif +# ifndef BOOST_MATH_ROUNDING_ERROR_POLICY +# define BOOST_MATH_ROUNDING_ERROR_POLICY ignore_error +# endif +#endif + +// // Special cases for exceptions disabled first: // #ifdef BOOST_MATH_NO_EXCEPTIONS @@ -107,20 +132,20 @@ namespace policies{ #define BOOST_MATH_META_INT(Type, name, Default) \ template \ - class name : public std::integral_constant{}; \ + class name : public boost::math::integral_constant{}; \ \ namespace detail{ \ template \ - char test_is_valid_arg(const name* = nullptr); \ - char test_is_default_arg(const name* = nullptr); \ + BOOST_MATH_GPU_ENABLED char test_is_valid_arg(const name* = nullptr); \ + BOOST_MATH_GPU_ENABLED char test_is_default_arg(const name* = nullptr); \ \ template \ class is_##name##_imp \ { \ private: \ template \ - static char test(const name* = nullptr); \ - static double test(...); \ + BOOST_MATH_GPU_ENABLED static char test(const name* = nullptr); \ + BOOST_MATH_GPU_ENABLED static double test(...); \ public: \ static constexpr bool value = sizeof(test(static_cast(nullptr))) == sizeof(char); \ }; \ @@ -131,27 +156,27 @@ namespace policies{ { \ public: \ static constexpr bool value = boost::math::policies::detail::is_##name##_imp::value; \ - using type = std::integral_constant; \ + using type = boost::math::integral_constant; \ }; #define BOOST_MATH_META_BOOL(name, Default) \ template \ - class name : public std::integral_constant{}; \ + class name : public boost::math::integral_constant{}; \ \ namespace detail{ \ template \ - char test_is_valid_arg(const name* = nullptr); \ - char test_is_default_arg(const name* = nullptr); \ + BOOST_MATH_GPU_ENABLED char test_is_valid_arg(const name* = nullptr); \ + BOOST_MATH_GPU_ENABLED char test_is_default_arg(const name* = nullptr); \ \ template \ class is_##name##_imp \ { \ private: \ template \ - static char test(const name* = nullptr); \ - static double test(...); \ + BOOST_MATH_GPU_ENABLED static char test(const name* = nullptr); \ + BOOST_MATH_GPU_ENABLED static double test(...); \ public: \ - static constexpr bool value = sizeof(test(static_cast(nullptr))) == sizeof(char); \ + static constexpr bool value = sizeof(test(static_cast(nullptr))) == sizeof(char); \ }; \ } \ \ @@ -160,7 +185,7 @@ namespace policies{ { \ public: \ static constexpr bool value = boost::math::policies::detail::is_##name##_imp::value; \ - using type = std::integral_constant; \ + using type = boost::math::integral_constant; \ }; // @@ -232,27 +257,27 @@ struct precision // // Now work out the precision: // - using digits2_type = typename std::conditional< + using digits2_type = typename boost::math::conditional< (Digits10::value == 0), digits2<0>, digits2<((Digits10::value + 1) * 1000L) / 301L> >::type; public: #ifdef BOOST_BORLANDC - using type = typename std::conditional< + using type = typename boost::math::conditional< (Digits2::value > ::boost::math::policies::detail::precision::digits2_type::value), Digits2, digits2_type>::type; #else - using type = typename std::conditional< + using type = typename boost::math::conditional< (Digits2::value > digits2_type::value), Digits2, digits2_type>::type; #endif }; -double test_is_valid_arg(...); -double test_is_default_arg(...); -char test_is_valid_arg(const default_policy*); -char test_is_default_arg(const default_policy*); +BOOST_MATH_GPU_ENABLED double test_is_valid_arg(...); +BOOST_MATH_GPU_ENABLED double test_is_default_arg(...); +BOOST_MATH_GPU_ENABLED char test_is_valid_arg(const default_policy*); +BOOST_MATH_GPU_ENABLED char test_is_default_arg(const default_policy*); template class is_valid_policy_imp @@ -280,7 +305,7 @@ class is_default_policy { public: static constexpr bool value = boost::math::policies::detail::is_default_policy_imp::value; - using type = std::integral_constant; + using type = boost::math::integral_constant; template struct apply @@ -289,7 +314,7 @@ class is_default_policy }; }; -template +template struct append_N { using type = typename append_N, T, N-1>::type; @@ -378,7 +403,7 @@ class policy // Typelist of the arguments: // using arg_list = mp::mp_list; - static constexpr std::size_t arg_list_size = mp::mp_size::value; + static constexpr boost::math::size_t arg_list_size = mp::mp_size::value; template struct pick_arg @@ -509,7 +534,7 @@ class normalise { private: using arg_list = mp::mp_list; - static constexpr std::size_t arg_list_size = mp::mp_size::value; + static constexpr boost::math::size_t arg_list_size = mp::mp_size::value; template struct pick_arg @@ -640,81 +665,81 @@ struct normalise, using type = policy; }; -inline constexpr policy<> make_policy() noexcept +BOOST_MATH_GPU_ENABLED constexpr policy<> make_policy() noexcept { return {}; } template -inline constexpr typename normalise, A1>::type make_policy(const A1&) noexcept +BOOST_MATH_GPU_ENABLED constexpr typename normalise, A1>::type make_policy(const A1&) noexcept { typedef typename normalise, A1>::type result_type; return result_type(); } template -inline constexpr typename normalise, A1, A2>::type make_policy(const A1&, const A2&) noexcept +BOOST_MATH_GPU_ENABLED constexpr typename normalise, A1, A2>::type make_policy(const A1&, const A2&) noexcept { typedef typename normalise, A1, A2>::type result_type; return result_type(); } template -inline constexpr typename normalise, A1, A2, A3>::type make_policy(const A1&, const A2&, const A3&) noexcept +BOOST_MATH_GPU_ENABLED constexpr typename normalise, A1, A2, A3>::type make_policy(const A1&, const A2&, const A3&) noexcept { typedef typename normalise, A1, A2, A3>::type result_type; return result_type(); } template -inline constexpr typename normalise, A1, A2, A3, A4>::type make_policy(const A1&, const A2&, const A3&, const A4&) noexcept +BOOST_MATH_GPU_ENABLED constexpr typename normalise, A1, A2, A3, A4>::type make_policy(const A1&, const A2&, const A3&, const A4&) noexcept { typedef typename normalise, A1, A2, A3, A4>::type result_type; return result_type(); } template -inline constexpr typename normalise, A1, A2, A3, A4, A5>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&) noexcept +BOOST_MATH_GPU_ENABLED constexpr typename normalise, A1, A2, A3, A4, A5>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&) noexcept { typedef typename normalise, A1, A2, A3, A4, A5>::type result_type; return result_type(); } template -inline constexpr typename normalise, A1, A2, A3, A4, A5, A6>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&) noexcept +BOOST_MATH_GPU_ENABLED constexpr typename normalise, A1, A2, A3, A4, A5, A6>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&) noexcept { typedef typename normalise, A1, A2, A3, A4, A5, A6>::type result_type; return result_type(); } template -inline constexpr typename normalise, A1, A2, A3, A4, A5, A6, A7>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&, const A7&) noexcept +BOOST_MATH_GPU_ENABLED constexpr typename normalise, A1, A2, A3, A4, A5, A6, A7>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&, const A7&) noexcept { typedef typename normalise, A1, A2, A3, A4, A5, A6, A7>::type result_type; return result_type(); } template -inline constexpr typename normalise, A1, A2, A3, A4, A5, A6, A7, A8>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&, const A7&, const A8&) noexcept +BOOST_MATH_GPU_ENABLED constexpr typename normalise, A1, A2, A3, A4, A5, A6, A7, A8>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&, const A7&, const A8&) noexcept { typedef typename normalise, A1, A2, A3, A4, A5, A6, A7, A8>::type result_type; return result_type(); } template -inline constexpr typename normalise, A1, A2, A3, A4, A5, A6, A7, A8, A9>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&, const A7&, const A8&, const A9&) noexcept +BOOST_MATH_GPU_ENABLED constexpr typename normalise, A1, A2, A3, A4, A5, A6, A7, A8, A9>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&, const A7&, const A8&, const A9&) noexcept { typedef typename normalise, A1, A2, A3, A4, A5, A6, A7, A8, A9>::type result_type; return result_type(); } template -inline constexpr typename normalise, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&, const A7&, const A8&, const A9&, const A10&) noexcept +BOOST_MATH_GPU_ENABLED constexpr typename normalise, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&, const A7&, const A8&, const A9&, const A10&) noexcept { typedef typename normalise, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10>::type result_type; return result_type(); } template -inline constexpr typename normalise, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&, const A7&, const A8&, const A9&, const A10&, const A11&) noexcept +BOOST_MATH_GPU_ENABLED constexpr typename normalise, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&, const A7&, const A8&, const A9&, const A10&, const A11&) noexcept { typedef typename normalise, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11>::type result_type; return result_type(); @@ -732,47 +757,47 @@ struct evaluation template struct evaluation { - using type = typename std::conditional::type; + using type = typename boost::math::conditional::type; }; template struct evaluation { - using type = typename std::conditional::type; + using type = typename boost::math::conditional::type; }; template struct precision { - static_assert((std::numeric_limits::radix == 2) || ((std::numeric_limits::is_specialized == 0) || (std::numeric_limits::digits == 0)), - "(std::numeric_limits::radix == 2) || ((std::numeric_limits::is_specialized == 0) || (std::numeric_limits::digits == 0))"); + static_assert((boost::math::numeric_limits::radix == 2) || ((boost::math::numeric_limits::is_specialized == 0) || (boost::math::numeric_limits::digits == 0)), + "(boost::math::numeric_limits::radix == 2) || ((boost::math::numeric_limits::is_specialized == 0) || (boost::math::numeric_limits::digits == 0))"); #ifndef BOOST_BORLANDC using precision_type = typename Policy::precision_type; - using type = typename std::conditional< - ((std::numeric_limits::is_specialized == 0) || (std::numeric_limits::digits == 0)), + using type = typename boost::math::conditional< + ((boost::math::numeric_limits::is_specialized == 0) || (boost::math::numeric_limits::digits == 0)), // Possibly unknown precision: precision_type, - typename std::conditional< - ((std::numeric_limits::digits <= precision_type::value) + typename boost::math::conditional< + ((boost::math::numeric_limits::digits <= precision_type::value) || (Policy::precision_type::value <= 0)), // Default case, full precision for RealType: - digits2< std::numeric_limits::digits>, + digits2< boost::math::numeric_limits::digits>, // User customised precision: precision_type >::type >::type; #else using precision_type = typename Policy::precision_type; - using digits_t = std::integral_constant::digits>; - using spec_t = std::integral_constant::is_specialized>; - using type = typename std::conditional< - (spec_t::value == true std::true_type || digits_t::value == 0), + using digits_t = boost::math::integral_constant::digits>; + using spec_t = boost::math::integral_constant::is_specialized>; + using type = typename boost::math::conditional< + (spec_t::value == true boost::math::true_type || digits_t::value == 0), // Possibly unknown precision: precision_type, - typename std::conditional< + typename boost::math::conditional< (digits_t::value <= precision_type::value || precision_type::value <= 0), // Default case, full precision for RealType: - digits2< std::numeric_limits::digits>, + digits2< boost::math::numeric_limits::digits>, // User customised precision: precision_type >::type @@ -785,7 +810,7 @@ struct precision template struct precision { - typedef std::integral_constant type; + typedef boost::math::integral_constant type; }; #endif @@ -793,15 +818,15 @@ struct precision namespace detail{ template -inline constexpr int digits_imp(std::true_type const&) noexcept +BOOST_MATH_GPU_ENABLED constexpr int digits_imp(boost::math::true_type const&) noexcept { - static_assert( std::numeric_limits::is_specialized, "std::numeric_limits::is_specialized"); + static_assert( boost::math::numeric_limits::is_specialized, "boost::math::numeric_limits::is_specialized"); typedef typename boost::math::policies::precision::type p_t; return p_t::value; } template -inline constexpr int digits_imp(std::false_type const&) noexcept +BOOST_MATH_GPU_ENABLED constexpr int digits_imp(boost::math::false_type const&) noexcept { return tools::digits(); } @@ -809,26 +834,26 @@ inline constexpr int digits_imp(std::false_type const&) noexcept } // namespace detail template -inline constexpr int digits(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept +BOOST_MATH_GPU_ENABLED constexpr int digits(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept { - typedef std::integral_constant::is_specialized > tag_type; + typedef boost::math::integral_constant::is_specialized > tag_type; return detail::digits_imp(tag_type()); } template -inline constexpr int digits_base10(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept +BOOST_MATH_GPU_ENABLED constexpr int digits_base10(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept { return boost::math::policies::digits() * 301 / 1000L; } template -inline constexpr unsigned long get_max_series_iterations() noexcept +BOOST_MATH_GPU_ENABLED constexpr unsigned long get_max_series_iterations() noexcept { typedef typename Policy::max_series_iterations_type iter_type; return iter_type::value; } template -inline constexpr unsigned long get_max_root_iterations() noexcept +BOOST_MATH_GPU_ENABLED constexpr unsigned long get_max_root_iterations() noexcept { typedef typename Policy::max_root_iterations_type iter_type; return iter_type::value; @@ -839,51 +864,51 @@ namespace detail{ template struct series_factor_calc { - static T get() noexcept(std::is_floating_point::value) + BOOST_MATH_GPU_ENABLED static T get() noexcept(boost::math::is_floating_point::value) { return ldexp(T(1.0), 1 - Digits::value); } }; template -struct series_factor_calc +struct series_factor_calc { - static constexpr T get() noexcept(std::is_floating_point::value) + BOOST_MATH_GPU_ENABLED static constexpr T get() noexcept(boost::math::is_floating_point::value) { return boost::math::tools::epsilon(); } }; template -struct series_factor_calc +struct series_factor_calc { - static constexpr T get() noexcept(std::is_floating_point::value) + BOOST_MATH_GPU_ENABLED static constexpr T get() noexcept(boost::math::is_floating_point::value) { - return 1 / static_cast(static_cast(1u) << (Digits::value - 1)); + return 1 / static_cast(static_cast(1u) << (Digits::value - 1)); } }; template -struct series_factor_calc +struct series_factor_calc { - static constexpr T get() noexcept(std::is_floating_point::value) + BOOST_MATH_GPU_ENABLED static constexpr T get() noexcept(boost::math::is_floating_point::value) { return boost::math::tools::epsilon(); } }; template -inline constexpr T get_epsilon_imp(std::true_type const&) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED constexpr T get_epsilon_imp(boost::math::true_type const&) noexcept(boost::math::is_floating_point::value) { - static_assert(std::numeric_limits::is_specialized, "std::numeric_limits::is_specialized"); - static_assert(std::numeric_limits::radix == 2, "std::numeric_limits::radix == 2"); + static_assert(boost::math::numeric_limits::is_specialized, "boost::math::numeric_limits::is_specialized"); + static_assert(boost::math::numeric_limits::radix == 2, "boost::math::numeric_limits::radix == 2"); typedef typename boost::math::policies::precision::type p_t; - typedef std::integral_constant::digits> is_small_int; - typedef std::integral_constant= std::numeric_limits::digits> is_default_value; + typedef boost::math::integral_constant::digits> is_small_int; + typedef boost::math::integral_constant= boost::math::numeric_limits::digits> is_default_value; return series_factor_calc::get(); } template -inline constexpr T get_epsilon_imp(std::false_type const&) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED constexpr T get_epsilon_imp(boost::math::false_type const&) noexcept(boost::math::is_floating_point::value) { return tools::epsilon(); } @@ -891,9 +916,9 @@ inline constexpr T get_epsilon_imp(std::false_type const&) noexcept(std::is_floa } // namespace detail template -inline constexpr T get_epsilon(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED constexpr T get_epsilon(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(boost::math::is_floating_point::value) { - typedef std::integral_constant::is_specialized && (std::numeric_limits::radix == 2)) > tag_type; + typedef boost::math::integral_constant::is_specialized && (boost::math::numeric_limits::radix == 2)) > tag_type; return detail::get_epsilon_imp(tag_type()); } @@ -910,8 +935,8 @@ template -char test_is_policy(const policy*); -double test_is_policy(...); +BOOST_MATH_GPU_ENABLED char test_is_policy(const policy*); +BOOST_MATH_GPU_ENABLED double test_is_policy(...); template class is_policy_imp @@ -927,7 +952,7 @@ class is_policy { public: static constexpr bool value = boost::math::policies::detail::is_policy_imp

::value; - using type = std::integral_constant; + using type = boost::math::integral_constant; }; // @@ -937,20 +962,20 @@ template struct constructor_error_check { using domain_error_type = typename Policy::domain_error_type; - using type = typename std::conditional< + using type = typename boost::math::conditional< (domain_error_type::value == throw_on_error) || (domain_error_type::value == user_error) || (domain_error_type::value == errno_on_error), - std::true_type, - std::false_type>::type; + boost::math::true_type, + boost::math::false_type>::type; }; template struct method_error_check { using domain_error_type = typename Policy::domain_error_type; - using type = typename std::conditional< + using type = typename boost::math::conditional< (domain_error_type::value == throw_on_error), - std::false_type, - std::true_type>::type; + boost::math::false_type, + boost::math::true_type>::type; }; // // Does the Policy ever throw on error? diff --git a/include/boost/math/quadrature/detail/exp_sinh_detail.hpp b/include/boost/math/quadrature/detail/exp_sinh_detail.hpp index 2df07b6ecc..77f2fbf060 100644 --- a/include/boost/math/quadrature/detail/exp_sinh_detail.hpp +++ b/include/boost/math/quadrature/detail/exp_sinh_detail.hpp @@ -7,6 +7,10 @@ #ifndef BOOST_MATH_QUADRATURE_DETAIL_EXP_SINH_DETAIL_HPP #define BOOST_MATH_QUADRATURE_DETAIL_EXP_SINH_DETAIL_HPP +#include + +#ifndef BOOST_MATH_HAS_NVRTC + #include #include #include @@ -541,4 +545,1458 @@ void exp_sinh_detail::init(const std::integral_constant&) } } } -#endif + +#endif // BOOST_MATH_HAS_NVRTC + +#ifdef BOOST_MATH_ENABLE_CUDA // BOOST_MATH_ENABLE_CUDA + +#include +#include +#include +#include +#include + +namespace boost { +namespace math { +namespace quadrature { +namespace detail { + +// In the CUDA case we break these down into a series of fixed size arrays and then make a pointer to the arrays +// We can't use a 2D array because it takes up far too much memory that is primarily wasted space + +__constant__ float m_abscissas_float_1[9] = + { 3.47876573e-23f, 5.62503650e-09f, 9.95706124e-04f, 9.67438487e-02f, 7.43599217e-01f, 4.14293205e+00f, + 1.08086768e+02f, 4.56291316e+05f, 2.70123007e+15f, }; + +__constant__ float m_abscissas_float_2[8] = + { 2.41870864e-14f, 1.02534662e-05f, 1.65637566e-02f, 3.11290799e-01f, 1.64691269e+00f, 1.49800773e+01f, + 2.57724301e+03f, 2.24833766e+09f, }; + +__constant__ float m_abscissas_float_3[16] = + { 3.24983286e-18f, 2.51095186e-11f, 3.82035773e-07f, 1.33717837e-04f, 4.80260650e-03f, 4.41526928e-02f, + 1.83045938e-01f, 4.91960276e-01f, 1.10322609e+00f, 2.53681744e+00f, 7.39791792e+00f, 3.59560256e+01f, + 4.36061333e+02f, 2.49501460e+04f, 1.89216933e+07f, 1.03348694e+12f, }; + +__constant__ float m_abscissas_float_4[33] = + { 1.51941172e-20f, 3.70201714e-16f, 9.67598102e-13f, 4.44773051e-10f, 5.28493928e-08f, 2.19158236e-06f, + 4.00799258e-05f, 3.88011529e-04f, 2.29325538e-03f, 9.25182629e-03f, 2.78117501e-02f, 6.67553298e-02f, + 1.35173168e-01f, 2.41374946e-01f, 3.94194704e-01f, 6.07196731e-01f, 9.06432514e-01f, 1.34481045e+00f, + 2.03268444e+00f, 3.21243032e+00f, 5.46310949e+00f, 1.03365745e+01f, 2.26486752e+01f, 6.03727778e+01f, + 2.08220266e+02f, 1.00431239e+03f, 7.47843388e+03f, 9.75279951e+04f, 2.61755592e+06f, 1.77776624e+08f, + 3.98255346e+10f, 4.13443763e+13f, 3.07708133e+17f, }; + +__constant__ float m_abscissas_float_5[66] = + { 7.99409438e-22f, 2.41624595e-19f, 3.73461321e-17f, 3.19397902e-15f, 1.62042378e-13f, 5.18579386e-12f, + 1.10520072e-10f, 1.64548212e-09f, 1.78534009e-08f, 1.46529196e-07f, 9.40168786e-07f, 4.85507733e-06f, + 2.07038029e-05f, 7.45799409e-05f, 2.31536599e-04f, 6.30580368e-04f, 1.53035449e-03f, 3.35582040e-03f, + 6.73124842e-03f, 1.24856832e-02f, 2.16245309e-02f, 3.52720523e-02f, 5.45995171e-02f, 8.07587788e-02f, + 1.14840025e-01f, 1.57867103e-01f, 2.10837078e-01f, 2.74805391e-01f, 3.51015955e-01f, 4.41077540e-01f, + 5.47194016e-01f, 6.72466825e-01f, 8.21304567e-01f, 1.00000000e+00f, 1.21757511e+00f, 1.48706221e+00f, + 1.82750536e+00f, 2.26717507e+00f, 2.84887335e+00f, 3.63893880e+00f, 4.74299876e+00f, 6.33444194e+00f, + 8.70776542e+00f, 1.23825548e+01f, 1.83151803e+01f, 2.83510579e+01f, 4.62437776e+01f, 8.00917327e+01f, + 1.48560852e+02f, 2.97989725e+02f, 6.53443372e+02f, 1.58584068e+03f, 4.31897162e+03f, 1.34084311e+04f, + 4.83003053e+04f, 2.05969943e+05f, 1.06363880e+06f, 6.82457850e+06f, 5.60117371e+07f, 6.07724622e+08f, + 9.04813016e+09f, 1.92834507e+11f, 6.17122515e+12f, 3.13089095e+14f, 2.67765347e+16f, 4.13865153e+18f, }; + +__constant__ float m_abscissas_float_6[132] = + { 1.70893932e-22f, 3.56621447e-21f, 6.19138882e-20f, 9.04299298e-19f, 1.12287188e-17f, 1.19706303e-16f, + 1.10583090e-15f, 8.92931857e-15f, 6.35404710e-14f, 4.01527389e-13f, 2.26955738e-12f, 1.15522811e-11f, + 5.32913181e-11f, 2.24130967e-10f, 8.64254491e-10f, 3.07161058e-09f, 1.01117742e-08f, 3.09775637e-08f, + 8.87004371e-08f, 2.38368096e-07f, 6.03520392e-07f, 1.44488635e-06f, 3.28212299e-06f, 7.09655821e-06f, + 1.46494407e-05f, 2.89537394e-05f, 5.49357161e-05f, 1.00313252e-04f, 1.76700203e-04f, 3.00920507e-04f, + 4.96484845e-04f, 7.95150594e-04f, 1.23845781e-03f, 1.87911525e-03f, 2.78210510e-03f, 4.02538552e-03f, + 5.70009588e-03f, 7.91020800e-03f, 1.07716137e-02f, 1.44106884e-02f, 1.89624177e-02f, 2.45682104e-02f, + 3.13735515e-02f, 3.95256605e-02f, 4.91713196e-02f, 6.04550279e-02f, 7.35176150e-02f, 8.84954195e-02f, + 1.05520113e-01f, 1.24719213e-01f, 1.46217318e-01f, 1.70138063e-01f, 1.96606781e-01f, 2.25753880e-01f, + 2.57718900e-01f, 2.92655274e-01f, 3.30735809e-01f, 3.72158929e-01f, 4.17155794e-01f, 4.65998399e-01f, + 5.19008863e-01f, 5.76570161e-01f, 6.39138643e-01f, 7.07258781e-01f, 7.81580731e-01f, 8.62881450e-01f, + 9.52090320e-01f, 1.05032052e+00f, 1.15890775e+00f, 1.27945836e+00f, 1.41390963e+00f, 1.56460576e+00f, + 1.73439430e+00f, 1.92674937e+00f, 2.14593012e+00f, 2.39718593e+00f, 2.68702407e+00f, 3.02356133e+00f, + 3.41698950e+00f, 3.88019661e+00f, 4.42960272e+00f, 5.08629455e+00f, 5.87757956e+00f, 6.83913514e+00f, + 8.01801085e+00f, 9.47686632e+00f, 1.13000199e+01f, 1.36021823e+01f, 1.65412214e+01f, 2.03370584e+01f, + 2.53000199e+01f, 3.18739815e+01f, 4.07030054e+01f, 5.27358913e+01f, 6.93929374e+01f, 9.28366010e+01f, + 1.26418926e+02f, 1.75435645e+02f, 2.48423411e+02f, 3.59440052e+02f, 5.32165336e+02f, 8.07455844e+02f, + 1.25762341e+03f, 2.01416017e+03f, 3.32313676e+03f, 5.65930306e+03f, 9.96877263e+03f, 1.82030939e+04f, + 3.45378531e+04f, 6.82619916e+04f, 1.40913380e+05f, 3.04680844e+05f, 6.92095957e+05f, 1.65694484e+06f, + 4.19519229e+06f, 1.12739016e+07f, 3.22814282e+07f, 9.88946136e+07f, 3.25562103e+08f, 1.15706659e+09f, + 4.46167708e+09f, 1.87647826e+10f, 8.65629909e+10f, 4.40614549e+11f, 2.49049013e+12f, 1.57380011e+13f, + 1.11990629e+14f, 9.04297390e+14f, 8.35377903e+15f, 8.90573552e+16f, 1.10582857e+18f, 1.61514650e+19f, }; + +__constant__ float m_abscissas_float_7[263] = + { 7.75845008e-23f, 3.71846701e-22f, 1.69833677e-21f, 7.40284853e-21f, 3.08399399e-20f, 1.22962599e-19f, + 4.69855182e-19f, 1.72288020e-18f, 6.07012059e-18f, 2.05742924e-17f, 6.71669437e-17f, 2.11441966e-16f, + 6.42566550e-16f, 1.88715605e-15f, 5.36188198e-15f, 1.47533056e-14f, 3.93507835e-14f, 1.01841667e-13f, + 2.55981752e-13f, 6.25453236e-13f, 1.48683211e-12f, 3.44173601e-12f, 7.76421789e-12f, 1.70831312e-11f, + 3.66877698e-11f, 7.69632540e-11f, 1.57822184e-10f, 3.16577320e-10f, 6.21604166e-10f, 1.19551931e-09f, + 2.25364361e-09f, 4.16647469e-09f, 7.55905964e-09f, 1.34658870e-08f, 2.35675936e-08f, 4.05458117e-08f, + 6.86052525e-08f, 1.14227960e-07f, 1.87243781e-07f, 3.02323521e-07f, 4.81026747e-07f, 7.54564302e-07f, + 1.16746531e-06f, 1.78236867e-06f, 2.68618781e-06f, 3.99792342e-06f, 5.87841837e-06f, 8.54236163e-06f, + 1.22728487e-05f, 1.74387947e-05f, 2.45154696e-05f, 3.41083807e-05f, 4.69806683e-05f, 6.40841007e-05f, + 8.65936597e-05f, 1.15945600e-04f, 1.53878746e-04f, 2.02478652e-04f, 2.64224143e-04f, 3.42035594e-04f, + 4.39324211e-04f, 5.60041454e-04f, 7.08727668e-04f, 8.90558896e-04f, 1.11139085e-03f, 1.37779898e-03f, + 1.69711358e-03f, 2.07744903e-03f, 2.52772622e-03f, 3.05768742e-03f, 3.67790298e-03f, 4.39976940e-03f, + 5.23549846e-03f, 6.19809738e-03f, 7.30134015e-03f, 8.55973022e-03f, 9.98845520e-03f, 1.16033342e-02f, + 1.34207587e-02f, 1.54576276e-02f, 1.77312787e-02f, 2.02594158e-02f, 2.30600348e-02f, 2.61513493e-02f, + 2.95517158e-02f, 3.32795626e-02f, 3.73533204e-02f, 4.17913590e-02f, 4.66119283e-02f, 5.18331072e-02f, + 5.74727595e-02f, 6.35484986e-02f, 7.00776615e-02f, 7.70772927e-02f, 8.45641386e-02f, 9.25546518e-02f, + 1.01065008e-01f, 1.10111132e-01f, 1.19708739e-01f, 1.29873379e-01f, 1.40620505e-01f, 1.51965539e-01f, + 1.63923958e-01f, 1.76511391e-01f, 1.89743720e-01f, 2.03637197e-01f, 2.18208574e-01f, 2.33475238e-01f, + 2.49455360e-01f, 2.66168055e-01f, 2.83633553e-01f, 3.01873381e-01f, 3.20910560e-01f, 3.40769809e-01f, + 3.61477772e-01f, 3.83063247e-01f, 4.05557445e-01f, 4.28994258e-01f, 4.53410546e-01f, 4.78846448e-01f, + 5.05345717e-01f, 5.32956079e-01f, 5.61729623e-01f, 5.91723220e-01f, 6.22998983e-01f, 6.55624768e-01f, + 6.89674714e-01f, 7.25229845e-01f, 7.62378724e-01f, 8.01218171e-01f, 8.41854062e-01f, 8.84402205e-01f, + 9.28989312e-01f, 9.75754080e-01f, 1.02484839e+00f, 1.07643865e+00f, 1.13070727e+00f, 1.18785434e+00f, + 1.24809950e+00f, 1.31168403e+00f, 1.37887320e+00f, 1.44995892e+00f, 1.52526270e+00f, 1.60513906e+00f, + 1.68997931e+00f, 1.78021589e+00f, 1.87632722e+00f, 1.97884333e+00f, 2.08835213e+00f, 2.20550671e+00f, + 2.33103353e+00f, 2.46574193e+00f, 2.61053497e+00f, 2.76642183e+00f, 2.93453226e+00f, 3.11613304e+00f, + 3.31264716e+00f, 3.52567596e+00f, 3.75702486e+00f, 4.00873326e+00f, 4.28310945e+00f, 4.58277134e+00f, + 4.91069419e+00f, 5.27026666e+00f, 5.66535674e+00f, 6.10038953e+00f, 6.58043928e+00f, 7.11133842e+00f, + 7.69980735e+00f, 8.35360902e+00f, 9.08173387e+00f, 9.89462150e+00f, 1.08044272e+01f, 1.18253437e+01f, + 1.29739897e+01f, 1.42698826e+01f, 1.57360130e+01f, 1.73995473e+01f, 1.92926887e+01f, 2.14537359e+01f, + 2.39283915e+01f, 2.67713817e+01f, 3.00484719e+01f, 3.38389827e+01f, 3.82389447e+01f, 4.33650689e+01f, + 4.93597649e+01f, 5.63975118e+01f, 6.46929803e+01f, 7.45114359e+01f, 8.61821250e+01f, 1.00115581e+02f, + 1.16826112e+02f, 1.36961158e+02f, 1.61339834e+02f, 1.91003781e+02f, 2.27284639e+02f, 2.71894067e+02f, + 3.27044548e+02f, 3.95612465e+02f, 4.81359585e+02f, 5.89235756e+02f, 7.25795284e+02f, 8.99773468e+02f, + 1.12289036e+03f, 1.41097920e+03f, 1.78558211e+03f, 2.27622329e+03f, 2.92367233e+03f, 3.78466551e+03f, + 4.93879227e+03f, 6.49862329e+03f, 8.62473434e+03f, 1.15481896e+04f, 1.56044945e+04f, 2.12853507e+04f, + 2.93183077e+04f, 4.07905708e+04f, 5.73434125e+04f, 8.14806753e+04f, 1.17063646e+05f, 1.70113785e+05f, + 2.50129854e+05f, 3.72274789e+05f, 5.61051155e+05f, 8.56556497e+05f, 1.32526810e+06f, 2.07888648e+06f, + 3.30771485e+06f, 5.34063130e+06f, 8.75442405e+06f, 1.45761434e+07f, 2.46634599e+07f, 4.24311457e+07f, + 7.42617251e+07f, 1.32291588e+08f, 2.40011058e+08f, 4.43725882e+08f, 8.36456588e+08f, 1.60874083e+09f, + 3.15878598e+09f, 6.33624483e+09f, 1.29932136e+10f, 2.72570398e+10f, 5.85372779e+10f, 1.28795973e+11f, + 2.90551047e+11f, 6.72570892e+11f, 1.59884056e+12f, 3.90652847e+12f, 9.81916374e+12f, 2.54124546e+13f, + 6.77814197e+13f, 1.86501681e+14f, 5.29897885e+14f, 1.55625904e+15f, 4.72943011e+15f, 1.48882761e+16f, + 4.86043448e+16f, 1.64741373e+17f, 5.80423410e+17f, 2.12831536e+18f, 8.13255421e+18f, }; + +__constant__ float m_abscissas_float_8[527] = + { 5.20331508e-23f, 1.15324162e-22f, 2.52466875e-22f, 5.46028730e-22f, 1.16690465e-21f, 2.46458927e-21f, + 5.14543768e-21f, 1.06205431e-20f, 2.16767715e-20f, 4.37564009e-20f, 8.73699691e-20f, 1.72595588e-19f, + 3.37377643e-19f, 6.52669145e-19f, 1.24976973e-18f, 2.36916845e-18f, 4.44691383e-18f, 8.26580373e-18f, + 1.52174118e-17f, 2.77517606e-17f, 5.01415830e-17f, 8.97689232e-17f, 1.59270821e-16f, 2.80084735e-16f, + 4.88253693e-16f, 8.43846463e-16f, 1.44610939e-15f, 2.45762595e-15f, 4.14251017e-15f, 6.92627770e-15f, + 1.14889208e-14f, 1.89084205e-14f, 3.08802476e-14f, 5.00504297e-14f, 8.05169965e-14f, 1.28579121e-13f, + 2.03847833e-13f, 3.20880532e-13f, 5.01568631e-13f, 7.78600100e-13f, 1.20044498e-12f, 1.83848331e-12f, + 2.79712543e-12f, 4.22808302e-12f, 6.35035779e-12f, 9.47805307e-12f, 1.40588174e-11f, 2.07266430e-11f, + 3.03739182e-11f, 4.42491437e-11f, 6.40886341e-11f, 9.22929507e-11f, 1.32161843e-10f, 1.88205259e-10f, + 2.66552657e-10f, 3.75488615e-10f, 5.26149742e-10f, 7.33426418e-10f, 1.01712318e-09f, 1.40344387e-09f, + 1.92688222e-09f, 2.63261606e-09f, 3.57952343e-09f, 4.84396276e-09f, 6.52448685e-09f, 8.74769197e-09f, + 1.16754399e-08f, 1.55137320e-08f, 2.05235608e-08f, 2.70341184e-08f, 3.54587968e-08f, 4.63144836e-08f, + 6.02447248e-08f, 7.80474059e-08f, 1.00707687e-07f, 1.29437018e-07f, 1.65719157e-07f, 2.11364220e-07f, + 2.68571894e-07f, 3.40005066e-07f, 4.28875221e-07f, 5.39041105e-07f, 6.75122241e-07f, 8.42629031e-07f, + 1.04811127e-06f, 1.29932703e-06f, 1.60543396e-06f, 1.97720518e-06f, 2.42727196e-06f, 2.97039558e-06f, + 3.62377065e-06f, 4.40736236e-06f, 5.34428013e-06f, 6.46118994e-06f, 7.78876789e-06f, 9.36219733e-06f, + 1.12217116e-05f, 1.34131848e-05f, 1.59887725e-05f, 1.90076038e-05f, 2.25365270e-05f, 2.66509096e-05f, + 3.14354940e-05f, 3.69853096e-05f, 4.34066412e-05f, 5.08180543e-05f, 5.93514765e-05f, 6.91533342e-05f, + 8.03857429e-05f, 9.32277499e-05f, 1.07876627e-04f, 1.24549208e-04f, 1.43483273e-04f, 1.64938971e-04f, + 1.89200275e-04f, 2.16576471e-04f, 2.47403671e-04f, 2.82046341e-04f, 3.20898851e-04f, 3.64387021e-04f, + 4.12969671e-04f, 4.67140163e-04f, 5.27427922e-04f, 5.94399942e-04f, 6.68662248e-04f, 7.50861330e-04f, + 8.41685517e-04f, 9.41866302e-04f, 1.05217960e-03f, 1.17344692e-03f, 1.30653650e-03f, 1.45236427e-03f, + 1.61189482e-03f, 1.78614219e-03f, 1.97617055e-03f, 2.18309485e-03f, 2.40808123e-03f, 2.65234740e-03f, + 2.91716284e-03f, 3.20384886e-03f, 3.51377855e-03f, 3.84837661e-03f, 4.20911898e-03f, 4.59753235e-03f, + 5.01519359e-03f, 5.46372894e-03f, 5.94481312e-03f, 6.46016832e-03f, 7.01156301e-03f, 7.60081065e-03f, + 8.22976829e-03f, 8.90033499e-03f, 9.61445021e-03f, 1.03740920e-02f, 1.11812753e-02f, 1.20380497e-02f, + 1.29464978e-02f, 1.39087327e-02f, 1.49268962e-02f, 1.60031562e-02f, 1.71397050e-02f, 1.83387564e-02f, + 1.96025436e-02f, 2.09333170e-02f, 2.23333419e-02f, 2.38048956e-02f, 2.53502659e-02f, 2.69717481e-02f, + 2.86716433e-02f, 3.04522558e-02f, 3.23158911e-02f, 3.42648538e-02f, 3.63014456e-02f, 3.84279634e-02f, + 4.06466974e-02f, 4.29599296e-02f, 4.53699317e-02f, 4.78789641e-02f, 5.04892744e-02f, 5.32030959e-02f, + 5.60226468e-02f, 5.89501290e-02f, 6.19877276e-02f, 6.51376099e-02f, 6.84019251e-02f, 7.17828036e-02f, + 7.52823576e-02f, 7.89026802e-02f, 8.26458461e-02f, 8.65139116e-02f, 9.05089155e-02f, 9.46328794e-02f, + 9.88878087e-02f, 1.03275694e-01f, 1.07798510e-01f, 1.12458223e-01f, 1.17256783e-01f, 1.22196135e-01f, + 1.27278214e-01f, 1.32504950e-01f, 1.37878272e-01f, 1.43400107e-01f, 1.49072382e-01f, 1.54897032e-01f, + 1.60875997e-01f, 1.67011231e-01f, 1.73304700e-01f, 1.79758387e-01f, 1.86374297e-01f, 1.93154462e-01f, + 2.00100939e-01f, 2.07215821e-01f, 2.14501238e-01f, 2.21959362e-01f, 2.29592410e-01f, 2.37402653e-01f, + 2.45392415e-01f, 2.53564085e-01f, 2.61920117e-01f, 2.70463037e-01f, 2.79195450e-01f, 2.88120044e-01f, + 2.97239599e-01f, 3.06556989e-01f, 3.16075193e-01f, 3.25797297e-01f, 3.35726506e-01f, 3.45866147e-01f, + 3.56219679e-01f, 3.66790698e-01f, 3.77582948e-01f, 3.88600328e-01f, 3.99846898e-01f, 4.11326892e-01f, + 4.23044723e-01f, 4.35004995e-01f, 4.47212512e-01f, 4.59672288e-01f, 4.72389556e-01f, 4.85369781e-01f, + 4.98618671e-01f, 5.12142186e-01f, 5.25946554e-01f, 5.40038281e-01f, 5.54424165e-01f, 5.69111309e-01f, + 5.84107138e-01f, 5.99419409e-01f, 6.15056232e-01f, 6.31026081e-01f, 6.47337815e-01f, 6.64000696e-01f, + 6.81024405e-01f, 6.98419060e-01f, 7.16195243e-01f, 7.34364016e-01f, 7.52936944e-01f, 7.71926120e-01f, + 7.91344191e-01f, 8.11204381e-01f, 8.31520518e-01f, 8.52307069e-01f, 8.73579162e-01f, 8.95352625e-01f, + 9.17644013e-01f, 9.40470650e-01f, 9.63850664e-01f, 9.87803022e-01f, 1.01234758e+00f, 1.03750512e+00f, + 1.06329740e+00f, 1.08974721e+00f, 1.11687839e+00f, 1.14471595e+00f, 1.17328606e+00f, 1.20261614e+00f, + 1.23273496e+00f, 1.26367264e+00f, 1.29546076e+00f, 1.32813247e+00f, 1.36172249e+00f, 1.39626730e+00f, + 1.43180514e+00f, 1.46837616e+00f, 1.50602252e+00f, 1.54478848e+00f, 1.58472055e+00f, 1.62586760e+00f, + 1.66828098e+00f, 1.71201469e+00f, 1.75712551e+00f, 1.80367319e+00f, 1.85172058e+00f, 1.90133388e+00f, + 1.95258276e+00f, 2.00554062e+00f, 2.06028484e+00f, 2.11689693e+00f, 2.17546288e+00f, 2.23607339e+00f, + 2.29882418e+00f, 2.36381627e+00f, 2.43115639e+00f, 2.50095725e+00f, 2.57333803e+00f, 2.64842468e+00f, + 2.72635049e+00f, 2.80725648e+00f, 2.89129193e+00f, 2.97861498e+00f, 3.06939317e+00f, 3.16380413e+00f, + 3.26203621e+00f, 3.36428929e+00f, 3.47077553e+00f, 3.58172026e+00f, 3.69736291e+00f, 3.81795798e+00f, + 3.94377618e+00f, 4.07510558e+00f, 4.21225285e+00f, 4.35554468e+00f, 4.50532923e+00f, 4.66197775e+00f, + 4.82588634e+00f, 4.99747780e+00f, 5.17720373e+00f, 5.36554672e+00f, 5.56302277e+00f, 5.77018396e+00f, + 5.98762126e+00f, 6.21596768e+00f, 6.45590164e+00f, 6.70815069e+00f, 6.97349551e+00f, 7.25277437e+00f, + 7.54688785e+00f, 7.85680417e+00f, 8.18356491e+00f, 8.52829128e+00f, 8.89219104e+00f, 9.27656603e+00f, + 9.68282047e+00f, 1.01124700e+01f, 1.05671518e+01f, 1.10486353e+01f, 1.15588347e+01f, 1.20998217e+01f, + 1.26738407e+01f, 1.32833247e+01f, 1.39309131e+01f, 1.46194716e+01f, 1.53521138e+01f, 1.61322255e+01f, + 1.69634913e+01f, 1.78499242e+01f, 1.87958987e+01f, 1.98061868e+01f, 2.08859991e+01f, 2.20410294e+01f, + 2.32775056e+01f, 2.46022448e+01f, 2.60227166e+01f, 2.75471124e+01f, 2.91844234e+01f, 3.09445281e+01f, + 3.28382897e+01f, 3.48776660e+01f, 3.70758319e+01f, 3.94473180e+01f, 4.20081658e+01f, 4.47761023e+01f, + 4.77707378e+01f, 5.10137879e+01f, 5.45293247e+01f, 5.83440613e+01f, 6.24876734e+01f, 6.69931639e+01f, + 7.18972765e+01f, 7.72409663e+01f, 8.30699343e+01f, 8.94352364e+01f, 9.63939781e+01f, 1.04010108e+02f, + 1.12355322e+02f, 1.21510104e+02f, 1.31564914e+02f, 1.42621552e+02f, 1.54794728e+02f, 1.68213867e+02f, + 1.83025185e+02f, 1.99394097e+02f, 2.17507985e+02f, 2.37579409e+02f, 2.59849828e+02f, 2.84593917e+02f, + 3.12124587e+02f, 3.42798827e+02f, 3.77024517e+02f, 4.15268384e+02f, 4.58065302e+02f, 5.06029199e+02f, + 5.59865843e+02f, 6.20387872e+02f, 6.88532497e+02f, 7.65382367e+02f, 8.52190227e+02f, 9.50408087e+02f, + 1.06172182e+03f, 1.18809220e+03f, 1.33180384e+03f, 1.49552334e+03f, 1.68236894e+03f, 1.89599367e+03f, + 2.14068513e+03f, 2.42148533e+03f, 2.74433485e+03f, 3.11624675e+03f, 3.54551666e+03f, 4.04197722e+03f, + 4.61730674e+03f, 5.28540457e+03f, 6.06284853e+03f, 6.96945350e+03f, 8.02895513e+03f, 9.26984864e+03f, + 1.07264200e+04f, 1.24400169e+04f, 1.44606187e+04f, 1.68487805e+04f, 1.96780458e+04f, 2.30379493e+04f, + 2.70377620e+04f, 3.18111749e+04f, 3.75221715e+04f, 4.43724093e+04f, 5.26105241e+04f, 6.25438881e+04f, + 7.45535092e+04f, 8.91129656e+04f, 1.06812532e+05f, 1.28390012e+05f, 1.54770253e+05f, 1.87115940e+05f, + 2.26893075e+05f, 2.75955654e+05f, 3.36655497e+05f, 4.11985149e+05f, 5.05764405e+05f, 6.22884544e+05f, + 7.69629183e+05f, 9.54097173e+05f, 1.18676186e+06f, 1.48121324e+06f, 1.85514609e+06f, 2.33168052e+06f, + 2.94113264e+06f, 3.72339780e+06f, 4.73116974e+06f, 6.03430539e+06f, 7.72576515e+06f, 9.92972861e+06f, + 1.28127257e+07f, 1.65989637e+07f, 2.15915179e+07f, 2.82017465e+07f, 3.69902945e+07f, 4.87244884e+07f, + 6.44590226e+07f, 8.56498776e+07f, 1.14315868e+08f, 1.53268759e+08f, 2.06442545e+08f, 2.79366798e+08f, + 3.79850300e+08f, 5.18973079e+08f, 7.12532948e+08f, 9.83165083e+08f, 1.36346329e+09f, 1.90059962e+09f, + 2.66319659e+09f, 3.75160395e+09f, 5.31334782e+09f, 7.56648043e+09f, 1.08350637e+10f, 1.56033907e+10f, + 2.25993074e+10f, 3.29229832e+10f, 4.82470799e+10f, 7.11297379e+10f, 1.05506900e+11f, 1.57471442e+11f, + 2.36513804e+11f, 3.57509889e+11f, 5.43926613e+11f, 8.33024431e+11f, 1.28435637e+12f, 1.99374510e+12f, + 3.11642465e+12f, 4.90561997e+12f, 7.77731247e+12f, 1.24197380e+13f, 1.99798484e+13f, 3.23831600e+13f, + 5.28864904e+13f, 8.70403770e+13f, 1.44377694e+14f, 2.41399528e+14f, 4.06896744e+14f, 6.91510621e+14f, + 1.18504970e+15f, 2.04811559e+15f, 3.57034809e+15f, 6.27861398e+15f, 1.11397125e+16f, 1.99435267e+16f, + 3.60337498e+16f, 6.57141972e+16f, 1.20980371e+17f, 2.24875057e+17f, 4.22089025e+17f, 8.00147402e+17f, + 1.53216987e+18f, 2.96403754e+18f, 5.79389087e+18f, 1.14455803e+19f, 2.28537992e+19f, }; + +__constant__ float* m_abscissas_float[8] = { + m_abscissas_float_1, + m_abscissas_float_2, + m_abscissas_float_3, + m_abscissas_float_4, + m_abscissas_float_5, + m_abscissas_float_6, + m_abscissas_float_7, + m_abscissas_float_8, +}; + +__constant__ float m_weights_float_1[9] = + { 1.79979618e-21f, 1.07218106e-07f, 7.05786060e-03f, 2.72310168e-01f, 1.18863515e+00f, 8.77655464e+00f, + 5.33879432e+02f, 5.98892409e+06f, 9.60751551e+16f, }; + +__constant__ float m_weights_float_2[8] = + { 7.59287827e-13f, 1.18886775e-04f, 7.27332179e-02f, 6.09156795e-01f, 2.71431234e+00f, 4.68800805e+01f, + 2.06437304e+04f, 4.85431236e+10f, }; + +__constant__ float m_weights_float_3[16] = + { 1.30963564e-16f, 6.14135316e-10f, 5.67743391e-06f, 1.21108690e-03f, 2.67259824e-02f, 1.54234107e-01f, + 4.23412860e-01f, 8.47913037e-01f, 1.73632925e+00f, 4.63203354e+00f, 1.88206826e+01f, 1.40643917e+02f, + 2.73736946e+03f, 2.55633252e+05f, 3.18438602e+08f, 2.86363931e+13f, }; + +__constant__ float m_weights_float_4[33] = + { 6.93769555e-19f, 1.31670336e-14f, 2.68107110e-11f, 9.60294960e-09f, 8.89417585e-07f, 2.87650015e-05f, + 4.10649371e-04f, 3.10797444e-03f, 1.43958814e-02f, 4.56980985e-02f, 1.08787148e-01f, 2.08910486e-01f, + 3.43887471e-01f, 5.11338439e-01f, 7.19769211e-01f, 1.00073403e+00f, 1.42660267e+00f, 2.14966467e+00f, + 3.50341221e+00f, 6.28632057e+00f, 1.26369961e+01f, 2.90949180e+01f, 7.91163114e+01f, 2.65103292e+02f, + 1.15872311e+03f, 7.11886439e+03f, 6.77324248e+04f, 1.13081650e+06f, 3.88995005e+07f, 3.38857764e+09f, + 9.74063570e+11f, 1.29789430e+15f, 1.24001927e+19f, }; + +__constant__ float m_weights_float_5[66] = + { 3.88541434e-20f, 1.03646493e-17f, 1.41388360e-15f, 1.06725054e-13f, 4.77908002e-12f, 1.34999345e-10f, + 2.53970414e-09f, 3.33804787e-08f, 3.19755978e-07f, 2.31724882e-06f, 1.31302324e-05f, 5.98917639e-05f, + 2.25650360e-04f, 7.18397083e-04f, 1.97196929e-03f, 4.75106406e-03f, 1.02072514e-02f, 1.98317011e-02f, + 3.52844239e-02f, 5.81350403e-02f, 8.95955146e-02f, 1.30335749e-01f, 1.80445384e-01f, 2.39557131e-01f, + 3.07102681e-01f, 3.82648608e-01f, 4.66260909e-01f, 5.58867257e-01f, 6.62616429e-01f, 7.81267733e-01f, + 9.20677638e-01f, 1.08949034e+00f, 1.30019425e+00f, 1.57079633e+00f, 1.92752387e+00f, 2.40924883e+00f, + 3.07485695e+00f, 4.01578082e+00f, 5.37784753e+00f, 7.40045071e+00f, 1.04890228e+01f, 1.53538346e+01f, + 2.32861156e+01f, 3.67307348e+01f, 6.05296516e+01f, 1.04761593e+02f, 1.91598840e+02f, 3.72918009e+02f, + 7.78738763e+02f, 1.76101294e+03f, 4.35837629e+03f, 1.19484066e+04f, 3.67841605e+04f, 1.29157756e+05f, + 5.26424122e+05f, 2.54082527e+06f, 1.48545930e+07f, 1.07925566e+08f, 1.00317513e+09f, 1.23283860e+10f, + 2.07922173e+11f, 5.01997049e+12f, 1.82006578e+14f, 1.04617001e+16f, 1.01373023e+18f, 1.77530238e+20f, }; + +__constant__ float m_weights_float_6[132] = + { 8.56958007e-21f, 1.68000718e-19f, 2.74008750e-18f, 3.75978801e-17f, 4.38589881e-16f, 4.39263787e-15f, + 3.81223973e-14f, 2.89198757e-13f, 1.93338859e-12f, 1.14783389e-11f, 6.09544349e-11f, 2.91499607e-10f, + 1.26339559e-09f, 4.99234840e-09f, 1.80872790e-08f, 6.03998541e-08f, 1.86829770e-07f, 5.37807971e-07f, + 1.44704121e-06f, 3.65421571e-06f, 8.69454276e-06f, 1.95621880e-05f, 4.17628758e-05f, 8.48713297e-05f, + 1.64680159e-04f, 3.05960283e-04f, 5.45748909e-04f, 9.36950301e-04f, 1.55189915e-03f, 2.48542560e-03f, + 3.85690505e-03f, 5.81079770e-03f, 8.51529070e-03f, 1.21588421e-02f, 1.69446644e-02f, 2.30834400e-02f, + 3.07847946e-02f, 4.02482241e-02f, 5.16542634e-02f, 6.51566792e-02f, 8.08763802e-02f, 9.88975757e-02f, + 1.19266512e-01f, 1.41992893e-01f, 1.67053901e-01f, 1.94400532e-01f, 2.23965873e-01f, 2.55674859e-01f, + 2.89455038e-01f, 3.25247905e-01f, 3.63020457e-01f, 4.02776696e-01f, 4.44568958e-01f, 4.88509042e-01f, + 5.34779290e-01f, 5.83643845e-01f, 6.35460497e-01f, 6.90693630e-01f, 7.49928915e-01f, 8.13890578e-01f, + 8.83462209e-01f, 9.59712352e-01f, 1.04392634e+00f, 1.13764623e+00f, 1.24272128e+00f, 1.36137177e+00f, + 1.49627028e+00f, 1.65064527e+00f, 1.82841374e+00f, 2.03435175e+00f, 2.27431458e+00f, 2.55552245e+00f, + 2.88693336e+00f, 3.27973254e+00f, 3.74797919e+00f, 4.30946679e+00f, 4.98687594e+00f, 5.80933099e+00f, + 6.81451887e+00f, 8.05159726e+00f, 9.58522167e+00f, 1.15011733e+01f, 1.39143002e+01f, 1.69798351e+01f, + 2.09096993e+01f, 2.59962450e+01f, 3.26472377e+01f, 4.14380231e+01f, 5.31903193e+01f, 6.90928164e+01f, + 9.08883744e+01f, 1.21168895e+02f, 1.63847041e+02f, 2.24923217e+02f, 3.13754154e+02f, 4.45189215e+02f, + 6.43236850e+02f, 9.47484116e+02f, 1.42457583e+03f, 2.18920236e+03f, 3.44338342e+03f, 5.55184130e+03f, + 9.19045432e+03f, 1.56468513e+04f, 2.74471462e+04f, 4.97037777e+04f, 9.31107740e+04f, 1.80835335e+05f, + 3.64968793e+05f, 7.67360053e+05f, 1.68525439e+06f, 3.87686515e+06f, 9.37022570e+06f, 2.38705733e+07f, + 6.43128750e+07f, 1.83920179e+08f, 5.60444636e+08f, 1.82722217e+09f, 6.40182180e+09f, 2.42153053e+10f, + 9.93804949e+10f, 4.44863150e+11f, 2.18425069e+12f, 1.18337660e+13f, 7.11948688e+13f, 4.78870731e+14f, + 3.62710215e+15f, 3.11747341e+16f, 3.06542975e+17f, 3.47854955e+18f, 4.59768243e+19f, 7.14806140e+20f, }; + +__constant__ float m_weights_float_7[263] = + { 3.95175890e-21f, 1.83575349e-20f, 8.12661397e-20f, 3.43336935e-19f, 1.38634563e-18f, 5.35757029e-18f, + 1.98424944e-17f, 7.05221126e-17f, 2.40827550e-16f, 7.91175869e-16f, 2.50347754e-15f, 7.63871031e-15f, + 2.25003103e-14f, 6.40502166e-14f, 1.76389749e-13f, 4.70424252e-13f, 1.21618334e-12f, 3.05082685e-12f, + 7.43273471e-12f, 1.76028616e-11f, 4.05602375e-11f, 9.10055013e-11f, 1.98994391e-10f, 4.24390078e-10f, + 8.83436580e-10f, 1.79636925e-09f, 3.57059250e-09f, 6.94247187e-09f, 1.32133371e-08f, 2.46332536e-08f, + 4.50110843e-08f, 8.06630537e-08f, 1.41856144e-07f, 2.44958654e-07f, 4.15579069e-07f, 6.93056106e-07f, + 1.13675616e-06f, 1.83473665e-06f, 2.91544023e-06f, 4.56318858e-06f, 7.03833675e-06f, 1.07030190e-05f, + 1.60534529e-05f, 2.37597559e-05f, 3.47141604e-05f, 5.00883685e-05f, 7.14005734e-05f, 1.00592372e-04f, + 1.40115414e-04f, 1.93027181e-04f, 2.63094779e-04f, 3.54905080e-04f, 4.73978972e-04f, 6.26886955e-04f, + 8.21362793e-04f, 1.06641153e-03f, 1.37240787e-03f, 1.75118071e-03f, 2.21607971e-03f, 2.78201983e-03f, + 3.46550010e-03f, 4.28459361e-03f, 5.25890609e-03f, 6.40950150e-03f, 7.75879384e-03f, 9.33040551e-03f, + 1.11489935e-02f, 1.32400455e-02f, 1.56296499e-02f, 1.83442433e-02f, 2.14103400e-02f, 2.48542509e-02f, + 2.87017958e-02f, 3.29780164e-02f, 3.77068968e-02f, 4.29110964e-02f, 4.86117029e-02f, 5.48280093e-02f, + 6.15773214e-02f, 6.88747982e-02f, 7.67333308e-02f, 8.51634602e-02f, 9.41733378e-02f, 1.03768728e-01f, + 1.13953051e-01f, 1.24727473e-01f, 1.36091031e-01f, 1.48040798e-01f, 1.60572082e-01f, 1.73678660e-01f, + 1.87353038e-01f, 2.01586736e-01f, 2.16370598e-01f, 2.31695113e-01f, 2.47550758e-01f, 2.63928342e-01f, + 2.80819365e-01f, 2.98216379e-01f, 3.16113348e-01f, 3.34506011e-01f, 3.53392244e-01f, 3.72772414e-01f, + 3.92649735e-01f, 4.13030618e-01f, 4.33925021e-01f, 4.55346789e-01f, 4.77314001e-01f, 4.99849320e-01f, + 5.22980337e-01f, 5.46739932e-01f, 5.71166640e-01f, 5.96305036e-01f, 6.22206131e-01f, 6.48927802e-01f, + 6.76535247e-01f, 7.05101473e-01f, 7.34707835e-01f, 7.65444619e-01f, 7.97411688e-01f, 8.30719192e-01f, + 8.65488366e-01f, 9.01852407e-01f, 9.39957463e-01f, 9.79963735e-01f, 1.02204672e+00f, 1.06639858e+00f, + 1.11322974e+00f, 1.16277062e+00f, 1.21527359e+00f, 1.27101525e+00f, 1.33029891e+00f, 1.39345744e+00f, + 1.46085648e+00f, 1.53289803e+00f, 1.61002461e+00f, 1.69272386e+00f, 1.78153384e+00f, 1.87704900e+00f, + 1.97992701e+00f, 2.09089644e+00f, 2.21076567e+00f, 2.34043290e+00f, 2.48089770e+00f, 2.63327413e+00f, + 2.79880590e+00f, 2.97888368e+00f, 3.17506505e+00f, 3.38909744e+00f, 3.62294469e+00f, 3.87881764e+00f, + 4.15920968e+00f, 4.46693789e+00f, 4.80519096e+00f, 5.17758497e+00f, 5.58822853e+00f, 6.04179895e+00f, + 6.54363157e+00f, 7.09982467e+00f, 7.71736306e+00f, 8.40426388e+00f, 9.16974906e+00f, 1.00244499e+01f, + 1.09806502e+01f, 1.20525758e+01f, 1.32567410e+01f, 1.46123627e+01f, 1.61418586e+01f, 1.78714466e+01f, + 1.98318690e+01f, 2.20592694e+01f, 2.45962577e+01f, 2.74932084e+01f, 3.08098460e+01f, 3.46171893e+01f, + 3.89999428e+01f, 4.40594471e+01f, 4.99173320e+01f, 5.67200545e+01f, 6.46445583e+01f, 7.39053537e+01f, + 8.47634121e+01f, 9.75373786e+01f, 1.12617765e+02f, 1.30484989e+02f, 1.51732386e+02f, 1.77095712e+02f, + 2.07491096e+02f, 2.44064119e+02f, 2.88253545e+02f, 3.41874461e+02f, 4.07227291e+02f, 4.87241400e+02f, + 5.85665251e+02f, 7.07319497e+02f, 8.58435639e+02f, 1.04711167e+03f, 1.28392853e+03f, 1.58278901e+03f, + 1.96206607e+03f, 2.44618436e+03f, 3.06781187e+03f, 3.87091688e+03f, 4.91505977e+03f, 6.28145970e+03f, + 8.08162997e+03f, 1.04697579e+04f, 1.36605846e+04f, 1.79554230e+04f, 2.37803156e+04f, 3.17424455e+04f, + 4.27142204e+04f, 5.79596727e+04f, 7.93261335e+04f, 1.09537503e+05f, 1.52647130e+05f, 2.14743829e+05f, + 3.05063335e+05f, 4.37755687e+05f, 6.34724899e+05f, 9.30240305e+05f, 1.37850753e+06f, 2.06623977e+06f, + 3.13377596e+06f, 4.81098405e+06f, 7.47905793e+06f, 1.17782423e+07f, 1.87980927e+07f, 3.04180655e+07f, + 4.99257437e+07f, 8.31551852e+07f, 1.40614107e+08f, 2.41519712e+08f, 4.21576502e+08f, 7.48209440e+08f, + 1.35089892e+09f, 2.48263348e+09f, 4.64662007e+09f, 8.86235204e+09f, 1.72348930e+10f, 3.41967381e+10f, + 6.92714904e+10f, 1.43352142e+11f, 3.03269524e+11f, 6.56345865e+11f, 1.45422052e+12f, 3.30099910e+12f, + 7.68267630e+12f, 1.83474885e+13f, 4.49980389e+13f, 1.13430702e+14f, 2.94148450e+14f, 7.85402504e+14f, + 2.16127995e+15f, 6.13534293e+15f, 1.79847736e+16f, 5.44944507e+16f, 1.70858922e+17f, 5.54922744e+17f, + 1.86905990e+18f, 6.53599225e+18f, 2.37582887e+19f, 8.98810682e+19f, 3.54341330e+20f, }; + +__constant__ float m_weights_float_8[527] = + { 2.67108015e-21f, 5.82833463e-21f, 1.25616316e-20f, 2.67469785e-20f, 5.62745845e-20f, 1.17014394e-19f, + 2.40511019e-19f, 4.88739481e-19f, 9.82072303e-19f, 1.95168062e-18f, 3.83661097e-18f, 7.46163208e-18f, + 1.43594942e-17f, 2.73485792e-17f, 5.15573612e-17f, 9.62223075e-17f, 1.77810682e-16f, 3.25389618e-16f, + 5.89765054e-16f, 1.05888451e-15f, 1.88354538e-15f, 3.31989417e-15f, 5.79902273e-15f, 1.00398818e-14f, + 1.72308010e-14f, 2.93186753e-14f, 4.94655967e-14f, 8.27635884e-14f, 1.37343706e-13f, 2.26082511e-13f, + 3.69205736e-13f, 5.98228147e-13f, 9.61866975e-13f, 1.53484658e-12f, 2.43090464e-12f, 3.82185577e-12f, + 5.96531965e-12f, 9.24474797e-12f, 1.42267754e-11f, 2.17427910e-11f, 3.30041201e-11f, 4.97635091e-11f, + 7.45399354e-11f, 1.10929412e-10f, 1.64031748e-10f, 2.41032586e-10f, 3.51991946e-10f, 5.10905560e-10f, + 7.37124150e-10f, 1.05723929e-09f, 1.50757352e-09f, 2.13744796e-09f, 3.01344401e-09f, 4.22492806e-09f, + 5.89117093e-09f, 8.17046854e-09f, 1.12717587e-08f, 1.54693324e-08f, 2.11213594e-08f, 2.86930859e-08f, + 3.87857241e-08f, 5.21722335e-08f, 6.98414017e-08f, 9.30518593e-08f, 1.23397923e-07f, 1.62889442e-07f, + 2.14048123e-07f, 2.80023159e-07f, 3.64729321e-07f, 4.73011070e-07f, 6.10836627e-07f, 7.85526363e-07f, + 1.00602028e-06f, 1.28318979e-06f, 1.63019938e-06f, 2.06292424e-06f, 2.60043021e-06f, 3.26552286e-06f, + 4.08537275e-06f, 5.09222413e-06f, 6.32419483e-06f, 7.82617466e-06f, 9.65083023e-06f, 1.18597236e-05f, + 1.45245521e-05f, 1.77285168e-05f, 2.15678251e-05f, 2.61533347e-05f, 3.16123436e-05f, 3.80905295e-05f, + 4.57540432e-05f, 5.47917575e-05f, 6.54176707e-05f, 7.78734661e-05f, 9.24312223e-05f, 1.09396271e-04f, + 1.29110197e-04f, 1.51953965e-04f, 1.78351176e-04f, 2.08771424e-04f, 2.43733750e-04f, 2.83810168e-04f, + 3.29629253e-04f, 3.81879756e-04f, 4.41314233e-04f, 5.08752659e-04f, 5.85085996e-04f, 6.71279692e-04f, + 7.68377076e-04f, 8.77502620e-04f, 9.99865030e-04f, 1.13676015e-03f, 1.28957360e-03f, 1.45978322e-03f, + 1.64896113e-03f, 1.85877551e-03f, 2.09099200e-03f, 2.34747474e-03f, 2.63018699e-03f, 2.94119122e-03f, + 3.28264890e-03f, 3.65681963e-03f, 4.06605991e-03f, 4.51282135e-03f, 4.99964828e-03f, 5.52917497e-03f, + 6.10412222e-03f, 6.72729343e-03f, 7.40157020e-03f, 8.12990738e-03f, 8.91532760e-03f, 9.76091537e-03f, + 1.06698107e-02f, 1.16452023e-02f, 1.26903202e-02f, 1.38084285e-02f, 1.50028172e-02f, 1.62767940e-02f, + 1.76336759e-02f, 1.90767806e-02f, 2.06094173e-02f, 2.22348784e-02f, 2.39564300e-02f, 2.57773028e-02f, + 2.77006834e-02f, 2.97297055e-02f, 3.18674406e-02f, 3.41168899e-02f, 3.64809756e-02f, 3.89625331e-02f, + 4.15643030e-02f, 4.42889240e-02f, 4.71389254e-02f, 5.01167213e-02f, 5.32246039e-02f, 5.64647382e-02f, + 5.98391571e-02f, 6.33497571e-02f, 6.69982939e-02f, 7.07863800e-02f, 7.47154815e-02f, 7.87869165e-02f, + 8.30018539e-02f, 8.73613125e-02f, 9.18661613e-02f, 9.65171203e-02f, 1.01314762e-01f, 1.06259513e-01f, + 1.11351656e-01f, 1.16591337e-01f, 1.21978563e-01f, 1.27513213e-01f, 1.33195039e-01f, 1.39023671e-01f, + 1.44998628e-01f, 1.51119321e-01f, 1.57385061e-01f, 1.63795066e-01f, 1.70348473e-01f, 1.77044340e-01f, + 1.83881662e-01f, 1.90859375e-01f, 1.97976367e-01f, 2.05231492e-01f, 2.12623572e-01f, 2.20151415e-01f, + 2.27813822e-01f, 2.35609599e-01f, 2.43537565e-01f, 2.51596569e-01f, 2.59785494e-01f, 2.68103274e-01f, + 2.76548903e-01f, 2.85121445e-01f, 2.93820047e-01f, 3.02643950e-01f, 3.11592502e-01f, 3.20665165e-01f, + 3.29861530e-01f, 3.39181328e-01f, 3.48624439e-01f, 3.58190905e-01f, 3.67880941e-01f, 3.77694943e-01f, + 3.87633504e-01f, 3.97697421e-01f, 4.07887708e-01f, 4.18205605e-01f, 4.28652591e-01f, 4.39230391e-01f, + 4.49940993e-01f, 4.60786652e-01f, 4.71769905e-01f, 4.82893580e-01f, 4.94160809e-01f, 5.05575036e-01f, + 5.17140031e-01f, 5.28859900e-01f, 5.40739096e-01f, 5.52782432e-01f, 5.64995090e-01f, 5.77382639e-01f, + 5.89951040e-01f, 6.02706666e-01f, 6.15656310e-01f, 6.28807202e-01f, 6.42167019e-01f, 6.55743908e-01f, + 6.69546490e-01f, 6.83583887e-01f, 6.97865729e-01f, 7.12402181e-01f, 7.27203953e-01f, 7.42282322e-01f, + 7.57649155e-01f, 7.73316926e-01f, 7.89298740e-01f, 8.05608358e-01f, 8.22260217e-01f, 8.39269463e-01f, + 8.56651970e-01f, 8.74424378e-01f, 8.92604116e-01f, 9.11209442e-01f, 9.30259469e-01f, 9.49774208e-01f, + 9.69774604e-01f, 9.90282579e-01f, 1.01132107e+00f, 1.03291408e+00f, 1.05508673e+00f, 1.07786529e+00f, + 1.10127728e+00f, 1.12535146e+00f, 1.15011796e+00f, 1.17560829e+00f, 1.20185546e+00f, 1.22889400e+00f, + 1.25676010e+00f, 1.28549162e+00f, 1.31512826e+00f, 1.34571158e+00f, 1.37728514e+00f, 1.40989460e+00f, + 1.44358784e+00f, 1.47841507e+00f, 1.51442894e+00f, 1.55168471e+00f, 1.59024039e+00f, 1.63015687e+00f, + 1.67149810e+00f, 1.71433126e+00f, 1.75872698e+00f, 1.80475947e+00f, 1.85250679e+00f, 1.90205105e+00f, + 1.95347869e+00f, 2.00688065e+00f, 2.06235275e+00f, 2.11999592e+00f, 2.17991652e+00f, 2.24222670e+00f, + 2.30704472e+00f, 2.37449538e+00f, 2.44471039e+00f, 2.51782884e+00f, 2.59399766e+00f, 2.67337209e+00f, + 2.75611628e+00f, 2.84240383e+00f, 2.93241843e+00f, 3.02635449e+00f, 3.12441791e+00f, 3.22682682e+00f, + 3.33381238e+00f, 3.44561973e+00f, 3.56250887e+00f, 3.68475574e+00f, 3.81265333e+00f, 3.94651282e+00f, + 4.08666490e+00f, 4.23346116e+00f, 4.38727553e+00f, 4.54850596e+00f, 4.71757611e+00f, 4.89493722e+00f, + 5.08107015e+00f, 5.27648761e+00f, 5.48173646e+00f, 5.69740032e+00f, 5.92410235e+00f, 6.16250823e+00f, + 6.41332946e+00f, 6.67732689e+00f, 6.95531455e+00f, 7.24816384e+00f, 7.55680807e+00f, 7.88224735e+00f, + 8.22555401e+00f, 8.58787841e+00f, 8.97045530e+00f, 9.37461076e+00f, 9.80176975e+00f, 1.02534643e+01f, + 1.07313428e+01f, 1.12371793e+01f, 1.17728848e+01f, 1.23405187e+01f, 1.29423019e+01f, 1.35806306e+01f, + 1.42580922e+01f, 1.49774818e+01f, 1.57418213e+01f, 1.65543795e+01f, 1.74186947e+01f, 1.83385994e+01f, + 1.93182476e+01f, 2.03621450e+01f, 2.14751816e+01f, 2.26626686e+01f, 2.39303784e+01f, 2.52845893e+01f, + 2.67321348e+01f, 2.82804577e+01f, 2.99376708e+01f, 3.17126238e+01f, 3.36149769e+01f, 3.56552840e+01f, + 3.78450835e+01f, 4.01970005e+01f, 4.27248599e+01f, 4.54438126e+01f, 4.83704762e+01f, 5.15230921e+01f, + 5.49217006e+01f, 5.85883374e+01f, 6.25472527e+01f, 6.68251567e+01f, 7.14514957e+01f, 7.64587609e+01f, + 8.18828353e+01f, 8.77633847e+01f, 9.41442967e+01f, 1.01074176e+02f, 1.08606902e+02f, 1.16802259e+02f, + 1.25726650e+02f, 1.35453899e+02f, 1.46066166e+02f, 1.57654979e+02f, 1.70322410e+02f, 1.84182406e+02f, + 1.99362306e+02f, 2.16004568e+02f, 2.34268740e+02f, 2.54333703e+02f, 2.76400239e+02f, 3.00693971e+02f, + 3.27468728e+02f, 3.57010397e+02f, 3.89641362e+02f, 4.25725590e+02f, 4.65674502e+02f, 5.09953726e+02f, + 5.59090900e+02f, 6.13684688e+02f, 6.74415211e+02f, 7.42056139e+02f, 8.17488717e+02f, 9.01718069e+02f, + 9.95892168e+02f, 1.10132394e+03f, 1.21951707e+03f, 1.35219615e+03f, 1.50134197e+03f, 1.66923291e+03f, + 1.85849349e+03f, 2.07215152e+03f, 2.31370536e+03f, 2.58720328e+03f, 2.89733724e+03f, 3.24955383e+03f, + 3.65018587e+03f, 4.10660860e+03f, 4.62742547e+03f, 5.22268956e+03f, 5.90416786e+03f, 6.68565726e+03f, + 7.58336313e+03f, 8.61635357e+03f, 9.80710572e+03f, 1.11821637e+04f, 1.27729327e+04f, 1.46166396e+04f, + 1.67574960e+04f, 1.92481112e+04f, 2.21512104e+04f, 2.55417295e+04f, 2.95093735e+04f, 3.41617487e+04f, + 3.96282043e+04f, 4.60645561e+04f, 5.36589049e+04f, 6.26388223e+04f, 7.32802431e+04f, 8.59184957e+04f, + 1.00962017e+05f, 1.18909442e+05f, 1.40370957e+05f, 1.66095034e+05f, 1.97001996e+05f, 2.34226253e+05f, + 2.79169596e+05f, 3.33568603e+05f, 3.99580125e+05f, 4.79889989e+05f, 5.77851588e+05f, 6.97663062e+05f, + 8.44594440e+05f, 1.02527965e+06f, 1.24809298e+06f, 1.52363581e+06f, 1.86536786e+06f, 2.29042802e+06f, + 2.82070529e+06f, 3.48424008e+06f, 4.31706343e+06f, 5.36561882e+06f, 6.68996113e+06f, 8.36799594e+06f, + 1.05011160e+07f, 1.32217203e+07f, 1.67032788e+07f, 2.11738506e+07f, 2.69343047e+07f, 3.43829654e+07f, + 4.40490690e+07f, 5.66383460e+07f, 7.30953564e+07f, 9.46890531e+07f, 1.23130681e+08f, 1.60736861e+08f, + 2.10656057e+08f, 2.77184338e+08f, 3.66207397e+08f, 4.85821891e+08f, 6.47212479e+08f, 8.65895044e+08f, + 1.16348659e+09f, 1.57023596e+09f, 2.12865840e+09f, 2.89877917e+09f, 3.96573294e+09f, 5.45082863e+09f, + 7.52773593e+09f, 1.04462776e+10f, 1.45675716e+10f, 2.04161928e+10f, 2.87579864e+10f, 4.07167363e+10f, + 5.79499965e+10f, 8.29154750e+10f, 1.19276754e+11f, 1.72524570e+11f, 2.50933409e+11f, 3.67042596e+11f, + 5.39962441e+11f, 7.98985690e+11f, 1.18927611e+12f, 1.78088199e+12f, 2.68310388e+12f, 4.06753710e+12f, + 6.20525592e+12f, 9.52719664e+12f, 1.47228407e+13f, 2.29025392e+13f, 3.58662837e+13f, 5.65517100e+13f, + 8.97859411e+13f, 1.43556057e+14f, 2.31171020e+14f, 3.74966777e+14f, 6.12702071e+14f, 1.00868013e+15f, + 1.67323268e+15f, 2.79711270e+15f, 4.71267150e+15f, 8.00353033e+15f, 1.37027503e+16f, 2.36538022e+16f, + 4.11734705e+16f, 7.22793757e+16f, 1.27982244e+17f, 2.28603237e+17f, 4.11976277e+17f, 7.49169358e+17f, + 1.37488861e+18f, 2.54681529e+18f, 4.76248383e+18f, 8.99167123e+18f, 1.71428840e+19f, 3.30088717e+19f, + 6.42020070e+19f, 1.26155602e+20f, 2.50480806e+20f, 5.02601059e+20f, 1.01935525e+21f, }; + +__constant__ float* m_weights_float[8] = { + m_weights_float_1, + m_weights_float_2, + m_weights_float_3, + m_weights_float_4, + m_weights_float_5, + m_weights_float_6, + m_weights_float_7, + m_weights_float_8 +}; + +__constant__ double m_abscissas_double_1[13] = + { 7.241670621354483269e-163, 2.257639733856759198e-60, 1.153241619257215165e-22, 8.747691973876861825e-09, + 1.173446923800022477e-03, 1.032756936219208144e-01, 7.719261204224504866e-01, 4.355544675823585545e+00, + 1.215101039066652656e+02, 6.228845436711506169e+05, 6.278613977336989392e+15, 9.127414935180233465e+42, + 6.091127771174027909e+116, }; + +__constant__ double m_abscissas_double_2[12] = + { 4.547459836328942014e-99, 6.678756542928857080e-37, 5.005042973041566360e-14, 1.341318484151208960e-05, + 1.833875636365939263e-02, 3.257972971286326131e-01, 1.712014688483495078e+00, 1.613222549264089627e+01, + 3.116246745274236447e+03, 3.751603952020919663e+09, 1.132259067258797346e+26, 6.799257464097374238e+70, }; + +__constant__ double m_abscissas_double_3[25] = + { 5.314690663257815465e-127, 2.579830034615362946e-77, 3.534801062399966878e-47, 6.733941646704537777e-29, + 8.265803726974829043e-18, 4.424914371157762285e-11, 5.390411046738629465e-07, 1.649389713333761449e-04, + 5.463728936866216652e-03, 4.787896410534771955e-02, 1.931544616590306846e-01, 5.121421856617965197e-01, + 1.144715949265016019e+00, 2.648424684387670480e+00, 7.856804169938798917e+00, 3.944731803343517708e+01, + 5.060291993016831194e+02, 3.181117494063683297e+04, 2.820174654949211729e+07, 1.993745099515255184e+12, + 1.943469269499068563e+20, 2.858803732300638372e+33, 1.457292199029008637e+55, 8.943565831706355607e+90, + 9.016198369791554655e+149, }; + +__constant__ double m_abscissas_double_4[49] = + { 8.165631636299519857e-144, 3.658949309353149331e-112, 1.635242513882908826e-87, 2.578381184977746454e-68, + 2.305546416275824199e-53, 1.016725540031465162e-41, 1.191823622917539774e-32, 1.379018088205016509e-25, + 4.375640088826073184e-20, 8.438464631330991606e-16, 1.838483310261119782e-12, 7.334264181393092650e-10, + 7.804740587931068021e-08, 2.970395577741681504e-06, 5.081805431666579484e-05, 4.671401627620431498e-04, + 2.652347404231090523e-03, 1.037409202661683856e-02, 3.045225582205323946e-02, 7.178280364982721201e-02, + 1.434001065841990688e-01, 2.535640852949085796e-01, 4.113268917643175920e-01, 6.310260805648534613e-01, + 9.404706503455087817e-01, 1.396267301972783068e+00, 2.116896928689963277e+00, 3.364289290471596568e+00, + 5.770183960005836987e+00, 1.104863531218761752e+01, 2.460224479439805859e+01, 6.699316387888639988e+01, + 2.375794092475844708e+02, 1.188092202760116066e+03, 9.269848635975416108e+03, 1.283900116155671304e+05, + 3.723397798030112514e+06, 2.793667983952389721e+08, 7.112973790863854188e+10, 8.704037695808749572e+13, + 8.001474015782459984e+17, 9.804091819390540578e+22, 3.342777673392873288e+29, 8.160092668471508447e+37, + 4.798775331663586528e+48, 3.228614320248853938e+62, 1.836986041572136151e+80, 1.153145986877483804e+103, + 2.160972586723647751e+132, }; + +__constant__ double m_abscissas_double_5[98] = + { 4.825077401709435655e-153, 3.813781211050297560e-135, 2.377824349780240844e-119, 2.065817295388293122e-105, + 4.132105770181358886e-93, 2.963965169989404311e-82, 1.127296662046635391e-72, 3.210346399945695041e-64, + 9.282992368222161062e-57, 3.565977853916619677e-50, 2.306962519220473637e-44, 3.098751038516535098e-39, + 1.039558064722960891e-34, 1.025256027381235200e-30, 3.432612000569885403e-27, 4.429681881379089961e-24, + 2.464589267395236846e-21, 6.526691446363344923e-19, 8.976892324445928684e-17, 6.926277695183452225e-15, + 3.208805316815751272e-13, 9.478053068835988899e-12, 1.882052586691155400e-10, 2.632616062773909009e-09, + 2.703411837703917665e-08, 2.113642195965330965e-07, 1.299327029813074013e-06, 6.461189935136030673e-06, + 2.665090959570723827e-05, 9.322774986189288194e-05, 2.820463407940068813e-04, 7.508613300035051413e-04, + 1.786142185986551786e-03, 3.848376610765768211e-03, 7.600810651854199771e-03, 1.390873269178271700e-02, + 2.380489559528694982e-02, 3.842796337748997654e-02, 5.895012901671883992e-02, 8.651391160689367948e-02, + 1.221961347398101671e-01, 1.670112314557845555e-01, 2.219593619059930701e-01, 2.881200442770917241e-01, + 3.667906976948184315e-01, 4.596722879563388211e-01, 5.691113093602836208e-01, 6.984190600916228379e-01, + 8.523070690462583711e-01, 1.037505121571600249e+00, 1.263672635742961915e+00, 1.544788480334120896e+00, + 1.901333876886441433e+00, 2.363816272813317635e+00, 2.978614980117902904e+00, 3.817957977526709364e+00, + 4.997477803461245639e+00, 6.708150685706236545e+00, 9.276566033183386532e+00, 1.328332469239125539e+01, + 1.980618680552458639e+01, 3.094452809319702849e+01, 5.101378787119006225e+01, 8.943523638413590523e+01, + 1.682138665185088325e+02, 3.427988270281270587e+02, 7.653823671943767281e+02, 1.895993667030670343e+03, + 5.285404568827643942e+03, 1.684878049282191210e+04, 6.254388805482299369e+04, 2.759556544455721132e+05, + 1.481213238071008345e+06, 9.929728611179601424e+06, 8.564987764771851841e+07, 9.831650826344826952e+08, + 1.560339073978569502e+10, 3.575098885016726922e+11, 1.241973798101884982e+13, 6.915106205748805839e+14, + 6.571419716645131084e+16, 1.144558033138694099e+19, 3.960915669532823553e+21, 2.984410558028297842e+24, + 5.430494850258846715e+27, 2.683747612498502676e+31, 4.114885708325522701e+35, 2.276004816861421600e+40, + 5.387544917595833246e+45, 6.623575732955432303e+51, 5.266881304835239338e+58, 3.473234812654772210e+66, + 2.517492645985977377e+75, 2.759797646289240629e+85, 6.569603829502412077e+96, 5.116181648220647995e+109, + 2.073901892339407423e+124, 7.406462446666255838e+140, }; + +__constant__ double m_abscissas_double_6[196] = + { 7.053618140948655098e-158, 2.343354218558056628e-148, 2.062509087689351439e-139, 5.212388628332260488e-131, + 4.079380320868843387e-123, 1.061481285006738214e-115, 9.816727607793017691e-109, 3.435400719609722581e-102, + 4.825198574681495574e-96, 2.874760995089533358e-90, 7.652499977338879996e-85, 9.556944498127119032e-80, + 5.862241023038227937e-75, 1.843934000129616663e-70, 3.096983980846232911e-66, 2.885057452402340330e-62, + 1.544904681826443837e-58, 4.917572705671511534e-55, 9.602608566391652866e-52, 1.184882375237471009e-48, + 9.499223316355714793e-46, 5.078965858882528461e-43, 1.856080838373584123e-40, 4.744245560917271585e-38, + 8.667497891102658240e-36, 1.155086178652063612e-33, 1.144541329818836153e-31, 8.585083084065812874e-30, + 4.957702933032408922e-28, 2.239353794616277882e-26, 8.030405447708765492e-25, 2.318459271131684362e-23, + 5.460287296679086677e-22, 1.062054307071706375e-20, 1.725955878033239909e-19, 2.369168446274347137e-18, + 2.775176063916613602e-17, 2.800847352316621903e-16, 2.457625954357892245e-15, 1.890842052364646528e-14, + 1.285791209258834942e-13, 7.786001004707878219e-13, 4.228083024410741194e-12, 2.072664297543567489e-11, + 9.229295073519997559e-11, 3.754886152592311575e-10, 1.403443871774813834e-09, 4.843962757371872495e-09, + 1.551373196623161433e-08, 4.631448362339623514e-08, 1.294370176865168120e-07, 3.400050664017164356e-07, + 8.426290307581447654e-07, 1.977205177561996033e-06, 4.407362363338667830e-06, 9.362197325373404563e-06, + 1.900760383449277992e-05, 3.698530963711860636e-05, 6.915333419235766653e-05, 1.245492076251852927e-04, + 2.165764713808099093e-04, 3.643870211078977292e-04, 5.943999416122372516e-04, 9.418663022314558591e-04, + 1.452364274261880083e-03, 2.183094846035196562e-03, 3.203848855069215278e-03, 4.597532353031862490e-03, + 6.460168315117479792e-03, 8.900334989802041559e-03, 1.203804973137064275e-02, 1.600315622064554965e-02, + 2.093331703849583304e-02, 2.697174812170771748e-02, 3.426485378063329473e-02, 4.295992956149806344e-02, + 5.320309587203163231e-02, 6.513760993479510261e-02, 7.890268021756337834e-02, 9.463287940877026649e-02, + 1.124582226719385153e-01, 1.325049504086213973e-01, 1.548970316076579260e-01, 1.797583869192584860e-01, + 2.072158210677632145e-01, 2.374026527414815016e-01, 2.704630368855767324e-01, 3.065569893452247137e-01, + 3.458661469783558388e-01, 3.886003277325320632e-01, 4.350049951304795319e-01, 4.853697810067132707e-01, + 5.400382807495678589e-01, 5.994194092045578293e-01, 6.640006964388650918e-01, 7.343640159321037167e-01, + 8.112043806284638130e-01, 8.953526245122194172e-01, 9.878030224123093447e-01, 1.089747207002141516e+00, + 1.202616144679226559e+00, 1.328132465995424226e+00, 1.468376159872979355e+00, 1.625867601500928277e+00, + 1.803673186618691186e+00, 2.005540624723209206e+00, 2.236073393446881709e+00, 2.500957254018255004e+00, + 2.807256477663534857e+00, 3.163804128101147487e+00, 3.581720263742550029e+00, 4.075105576391566303e+00, + 4.661977749936137761e+00, 5.365546718714963091e+00, 6.215967676434536043e+00, 7.252774367330402583e+00, + 8.528291278204291331e+00, 1.011247001122720391e+01, 1.209982167952718578e+01, 1.461947158782994207e+01, + 1.784992423404041042e+01, 2.204102944968352178e+01, 2.754711235628932374e+01, 3.487766600641650640e+01, + 4.477610230214251576e+01, 5.834406132739843834e+01, 7.724096630394042216e+01, 1.040101075374387191e+02, + 1.426215523101601730e+02, 1.993940974645466479e+02, 2.845939167898235356e+02, 4.152683836292551147e+02, + 6.203878718481709769e+02, 9.504080873581791535e+02, 1.495523342124078853e+03, 2.421485328006836634e+03, + 4.041977218227396500e+03, 6.969453497454785202e+03, 1.244001690461442846e+04, 2.303794930506892099e+04, + 4.437240927040385250e+04, 8.911296561746717657e+04, 1.871159398849787994e+05, 4.119851492265743330e+05, + 9.540971729944126398e+05, 2.331680521880789706e+06, 6.034305391011695472e+06, 1.659896369452266448e+07, + 4.872448839341613053e+07, 1.532687586549090392e+08, 5.189730792935011722e+08, 1.900599621040508288e+09, + 7.566480431232731818e+09, 3.292298322781643849e+10, 1.574714421665075635e+11, 8.330244306239795892e+11, + 4.905619969814187571e+12, 3.238316002757222702e+13, 2.413995281454699076e+14, 2.048115587426077343e+15, + 1.994352670766892066e+16, 2.248750566422739144e+17, 2.964037541992353401e+18, 4.613233119968213445e+19, + 8.569680508342001161e+20, 1.921851711942844799e+22, 5.266829246099861758e+23, 1.786779952992288976e+25, + 7.607919705736976491e+26, 4.125721424346450007e+28, 2.894340142292214313e+30, 2.670720269656428272e+32, + 3.299248229135205151e+34, 5.560105583582310103e+36, 1.304167266599523020e+39, 4.349382146382717353e+41, + 2.109720387774341509e+44, 1.524825352702403324e+47, 1.684941265105084589e+50, 2.925572737558413426e+53, + 8.217834961057481281e+56, 3.852117991896536784e+60, 3.114452310394384063e+64, 4.498555465873245751e+68, + 1.205113215232800796e+73, 6.230864727145221322e+77, 6.487131248948465269e+82, 1.422810109167834249e+88, + 6.897656089181724717e+93, 7.779163462756485195e+99, 2.155213251859555072e+106, 1.554347160152705281e+113, + 3.103875072425192272e+120, 1.832673821557018634e+128, 3.431285951865278376e+136, 2.194542081542393530e+145, }; + +__constant__ double m_abscissas_double_7[393] = + { 2.363803632659058081e-160, 1.926835442612677686e-155, 1.109114905180506786e-150, 4.556759282087534164e-146, + 1.350172241067816232e-141, 2.914359263635229435e-137, 4.627545976953585825e-133, 5.456508344460398758e-129, + 4.821828861306345485e-125, 3.221779152402086241e-121, 1.641732102111619421e-117, 6.433569189921227126e-114, + 1.954582672700428961e-110, 4.639912078942456372e-107, 8.671928891742699827e-104, 1.285485264305858782e-100, + 1.522161801460927566e-97, 1.449767844425295085e-94, 1.118122255504445235e-91, 7.028344777398825069e-89, + 3.623454064991238081e-86, 1.541513438874996543e-83, 5.443699502170284982e-81, 1.604913673768949456e-78, + 3.972206240977317536e-76, 8.297975554162539562e-74, 1.470748835855054032e-71, 2.222935801472624670e-69, + 2.879160361851977720e-67, 3.210837413250902178e-65, 3.097303984958235490e-63, 2.595974479763180595e-61, + 1.898656799199089593e-59, 1.216865518398435626e-57, 6.862041810601184397e-56, 3.418134121780773218e-54, + 1.509758535747580387e-52, 5.934924977563731784e-51, 2.083865009061241099e-49, 6.558128104492290092e-48, + 1.856133016606468181e-46, 4.739964621828176249e-45, 1.095600459825324697e-43, 2.299177139060262518e-42, + 4.393663812095906869e-41, 7.667728102142858487e-40, 1.225476279042445010e-38, 1.798526997315960782e-37, + 2.430201154741018716e-36, 3.030993518975438712e-35, 3.497966609954172613e-34, 3.744308272796551045e-33, + 3.726132797819332658e-32, 3.455018936399215381e-31, 2.991524108706319604e-30, 2.423818520801870809e-29, + 1.841452809687011486e-28, 1.314419760826235421e-27, 8.831901010260867670e-27, 5.596660060604091621e-26, + 3.350745417080507841e-25, 1.898675566025820409e-24, 1.019982287418197376e-23, 5.203315082978366918e-23, + 2.524668746906057148e-22, 1.166904646009344233e-21, 5.145437675264868732e-21, 2.167677145279166596e-20, + 8.736996911006110678e-20, 3.373776431076593266e-19, 1.249769727462160008e-18, 4.446913832647864892e-18, + 1.521741180930875343e-17, 5.014158301377399707e-17, 1.592708205361177316e-16, 4.882536933653862982e-16, + 1.446109387544416586e-15, 4.142510168443201880e-15, 1.148892083132325407e-14, 3.088024760858924214e-14, + 8.051699653634442236e-14, 2.038478329249539199e-13, 5.015686309363884049e-13, 1.200444984849900298e-12, + 2.797125428309156462e-12, 6.350357793399881333e-12, 1.405881744263466936e-11, 3.037391821635123795e-11, + 6.408863411016101449e-11, 1.321618431565916164e-10, 2.665526566207284474e-10, 5.261497418654313068e-10, + 1.017123184766088896e-09, 1.926882221639203388e-09, 3.579523428497157488e-09, 6.524486847652635035e-09, + 1.167543991262942921e-08, 2.052356080018121741e-08, 3.545879678923676129e-08, 6.024472481556065885e-08, + 1.007076869023518125e-07, 1.657191565891799652e-07, 2.685718943404479677e-07, 4.288752213761154116e-07, + 6.751222405372943925e-07, 1.048111270324302884e-06, 1.605433960692314060e-06, 2.427271958412371013e-06, + 3.623770645356477660e-06, 5.344280132492750309e-06, 7.788767891027678939e-06, 1.122171160022519082e-05, + 1.598877254198599908e-05, 2.253652700952153115e-05, 3.143549403208496646e-05, 4.340664122305257288e-05, + 5.935147653125578529e-05, 8.038574285450253209e-05, 1.078766266062957565e-04, 1.434832731669987826e-04, + 1.892002753957224677e-04, 2.474036705329449166e-04, 3.208988510028906069e-04, 4.129696713145546995e-04, + 5.274279220384250390e-04, 6.686622480794640482e-04, 8.416855170641220285e-04, 1.052179598744440400e-03, + 1.306536501050643762e-03, 1.611894824798787196e-03, 1.976170547826080496e-03, 2.408081229927640721e-03, + 2.917162840577481875e-03, 3.513778549028205519e-03, 4.209118976964403112e-03, 5.015193592567630665e-03, + 5.944813116164644191e-03, 7.011563005746090924e-03, 8.229768289624073049e-03, 9.614450207543986041e-03, + 1.118127530523730813e-02, 1.294649779580742160e-02, 1.492689615029751590e-02, 1.713970500593860526e-02, + 1.960254358145296755e-02, 2.233334186285684056e-02, 2.535026586984720664e-02, 2.867164333232700310e-02, + 3.231589109997912964e-02, 3.630144557680610965e-02, 4.064669741956638109e-02, 4.536993166688766414e-02, + 5.048927437769432941e-02, 5.602264675683979161e-02, 6.198772763597769678e-02, 6.840192506222012774e-02, + 7.528235762939712171e-02, 8.264584606994605986e-02, 9.050891551257121825e-02, 9.888780870447738360e-02, + 1.077985103995250356e-01, 1.172567830270636607e-01, 1.272782136821146663e-01, 1.378782724173011162e-01, + 1.490723817714478840e-01, 1.608759974398061173e-01, 1.733046999768424060e-01, 1.863742974247175786e-01, + 2.001009387790379976e-01, 2.145012382381487190e-01, 2.295924102330349785e-01, 2.453924153016625057e-01, + 2.619201169541956490e-01, 2.791954497739298773e-01, 2.972395991130188526e-01, 3.160751928723792943e-01, + 3.357265060019327741e-01, 3.562196785212496373e-01, 3.775829480426418792e-01, 3.998468979800887046e-01, + 4.230447228497335035e-01, 4.472125123131631074e-01, 4.723895558858634018e-01, 4.986186705332947608e-01, + 5.259465537097384485e-01, 5.544241647649479754e-01, 5.841071380560416511e-01, 6.150562315632864018e-01, + 6.473378153258308278e-01, 6.810244045956889952e-01, 7.161952432654565143e-01, 7.529369438691556459e-01, + 7.913441913000366617e-01, 8.315205183502086596e-01, 8.735791622734589226e-01, 9.176440128265773576e-01, + 9.638506636817484398e-01, 1.012347580753402101e+00, 1.063297402882930381e+00, 1.116878392515788506e+00, + 1.173286056537125469e+00, 1.232734960362603918e+00, 1.295460761779549539e+00, 1.361722494981910846e+00, + 1.431805139837984876e+00, 1.506022516788234345e+00, 1.584720554029819354e+00, 1.668280980969603645e+00, + 1.757125510515793421e+00, 1.851720582866847453e+00, 1.952582755329533200e+00, 2.060284836698905963e+00, + 2.175462881275503983e+00, 2.298824177179966629e+00, 2.431156386859774759e+00, 2.573338025304717222e+00, + 2.726350494395667363e+00, 2.891291931102408784e+00, 3.069393174263124520e+00, 3.262036211067640944e+00, + 3.470775532153801919e+00, 3.697362905908153155e+00, 3.943776181224350319e+00, 4.212252847439515687e+00, + 4.505329225191826639e+00, 4.825886338442190807e+00, 5.177203733275742875e+00, 5.563022772612923373e+00, + 5.987621259260909859e+00, 6.455901637501497370e+00, 6.973495514195020291e+00, 7.546887847708181032e+00, + 8.183564906772872855e+00, 8.892191039842283431e+00, 9.682820467523296204e+00, 1.056715177903931837e+01, + 1.155883465937652851e+01, 1.267384070151528947e+01, 1.393091310389918289e+01, 1.535211379418177923e+01, + 1.696349128797309510e+01, 1.879589868990482198e+01, 2.088599907466058846e+01, 2.327750557804054323e+01, + 2.602271658731131093e+01, 2.918442338619305962e+01, 3.283828974258811174e+01, 3.707583192189045823e+01, + 4.200816575721451990e+01, 4.777073782243997224e+01, 5.452932468101429049e+01, 6.248767344468634478e+01, + 7.189727649240108469e+01, 8.306993427631743111e+01, 9.639397813652482031e+01, 1.123553215857374919e+02, + 1.315649140340119335e+02, 1.547947284376312334e+02, 1.830251850988715552e+02, 2.175079854175568113e+02, + 2.598498278995140400e+02, 3.121245867818556035e+02, 3.770245173783702458e+02, 4.580653020257635092e+02, + 5.598658426219653689e+02, 6.885324967857802403e+02, 8.521902266884453403e+02, 1.061721815114114004e+03, + 1.331803836529085656e+03, 1.682368940494210217e+03, 2.140685129891926443e+03, 2.744334847491432747e+03, + 3.545516659371773357e+03, 4.617306735234797694e+03, 6.062848530677391758e+03, 8.028955134017154634e+03, + 1.072641999277462936e+04, 1.446061873485939411e+04, 1.967804579389513789e+04, 2.703776201447279367e+04, + 3.752217148194723312e+04, 5.261052412010591097e+04, 7.455350923854624329e+04, 1.068125318497402759e+05, + 1.547702528541975911e+05, 2.268930751685412563e+05, 3.366554971645478061e+05, 5.057644049026088560e+05, + 7.696291826884134742e+05, 1.186761864945790800e+06, 1.855146094294667715e+06, 2.941132644236832276e+06, + 4.731169740596920355e+06, 7.725765147199987935e+06, 1.281272565991955126e+07, 2.159151785284808339e+07, + 3.699029448836502904e+07, 6.445902263727884020e+07, 1.143158678867853615e+08, 2.064425450996979446e+08, + 3.798502995329785506e+08, 7.125329484929003007e+08, 1.363463294023391629e+09, 2.663196590686555077e+09, + 5.313347815419462975e+09, 1.083506369700027396e+10, 2.259930737910197667e+10, 4.824707991473375387e+10, + 1.055069002818752104e+11, 2.365138040635727209e+11, 5.439266129959972285e+11, 1.284356371641026839e+12, + 3.116424654245920797e+12, 7.777312465656280419e+12, 1.997984843259596733e+13, 5.288649037339853118e+13, + 1.443776937640548342e+14, 4.068967444890414804e+14, 1.185049702391501141e+15, 3.570348091883284324e+15, + 1.113971254034978026e+16, 3.603374982229766184e+16, 1.209803708182151942e+17, 4.220890251904870611e+17, + 1.532169872312865862e+18, 5.793890867821715890e+18, 2.285379920879842924e+19, 9.415714369232187727e+19, + 4.057471211245170887e+20, 1.831405465804324767e+21, 8.671209773404504008e+21, 4.313209261217173994e+22, + 2.257498454242656934e+23, 1.245267136898199709e+24, 7.251536499435180219e+24, 4.465573963364524765e+25, + 2.913233420596266283e+26, 2.017063171206072979e+27, 1.485014353353330393e+28, 1.164811091759882662e+29, + 9.753661264047912784e+29, 8.737124417851167566e+30, 8.390503265508677363e+31, 8.657362701430272680e+32, + 9.619472292454361392e+33, 1.153735498483960294e+35, 1.497284701983562213e+36, 2.107816695320163748e+37, + 3.227106623185610745e+38, 5.387696372515021985e+39, 9.835496017627849225e+40, 1.968904749086105300e+42, + 4.334704147416758275e+43, 1.052717645113369473e+45, 2.829013521120326147e+46, 8.439656297525588822e+47, + 2.804279894508234869e+49, 1.041383695988523864e+51, 4.337366591019718310e+52, 2.033523569151676725e+54, + 1.077238847489773081e+56, 6.472891251891105455e+57, 4.429404678715878536e+59, 3.466135480828349864e+61, + 3.114928656972704276e+63, 3.228947925415990689e+65, 3.878402486902381042e+67, 5.423187597439531197e+69, + 8.870779393460412583e+71, 1.705832285076755970e+74, 3.876224350373120420e+76, 1.046359534886878004e+79, + 3.373858809560757544e+81, 1.306762499786044015e+84, 6.115300889685679832e+86, 3.478550048884517349e+89, + 2.420073578988056289e+92, 2.072453567501123129e+95, 2.199029867204449277e+98, 2.910868575802139983e+101, + 4.840699137490951163e+104, 1.018669397739170369e+108, 2.733025017438095928e+111, 9.420797277586029837e+114, + 4.205525105722885986e+118, 2.451352708852151939e+122, 1.881577053794165543e+126, 1.918506219134233785e+130, + 2.622069659115564900e+134, 4.848463485415763756e+138, 1.224645005481997780e+143, 4.267387286482591954e+147, + 2.072505613372582377e+152, }; + +__constant__ double m_abscissas_double_8[786] = + { 1.323228129684237783e-161, 4.129002973520822791e-159, 1.178655462569548882e-156, 3.082189008893206231e-154, + 7.393542832199414487e-152, 1.629100644355328639e-149, 3.301545529059822941e-147, 6.162031390854241227e-145, + 1.060528194470986309e-142, 1.685225757497235089e-140, 2.475534097582263629e-138, 3.365764749507587192e-136, + 4.240562683924022383e-134, 4.956794227885611715e-132, 5.381716367914161520e-130, 5.433507172294988849e-128, + 5.107031242794315420e-126, 4.473704932098646394e-124, 3.656376947377888629e-122, 2.791170022694259001e-120, + 1.992200238692415032e-118, 1.330894359393789718e-116, 8.330356767359890503e-115, 4.890256639970245146e-113, + 2.695128935451165447e-111, 1.395829605415630844e-109, 6.799997527188085942e-108, 3.119037767379032293e-106, + 1.348260131419216291e-104, 5.497526018943990804e-103, 2.116384670251198533e-101, 7.699148714858061209e-100, + 2.649065347250598345e-98, 8.628189263549727753e-97, 2.662520943248368922e-95, 7.790698623582886341e-94, + 2.163354866683077281e-92, 5.705576739797220361e-91, 1.430338193028564913e-89, 3.411040781372328747e-88, + 7.744268073516449037e-87, 1.675136564303435813e-85, 3.454795810595704816e-84, 6.798573137099477363e-83, + 1.277474708033782661e-81, 2.293702139426309483e-80, 3.938021700015175030e-79, 6.469593934876300124e-78, + 1.017725266990912471e-76, 1.534019529793324951e-75, 2.216999886838860916e-74, 3.074100747562803362e-73, + 4.092295330837549092e-72, 5.233434175636538471e-71, 6.433506079763357418e-70, 7.607042677901362161e-69, + 8.656714387163425357e-68, 9.486746058685489974e-67, 1.001756724248288397e-65, 1.019853943834854330e-64, + 1.001591106610665630e-63, 9.494277822444263952e-63, 8.691422918891890649e-62, 7.687977047887448276e-61, + 6.574408104196605248e-60, 5.438162502918425191e-59, 4.353340831363003212e-58, 3.374338762181243411e-57, + 2.533770921173042330e-56, 1.844048925248616738e-55, 1.301410812308480184e-54, 8.910466744374470063e-54, + 5.921538384124132331e-53, 3.821356134297705127e-52, 2.395780657353036891e-51, 1.459882187581820236e-50, + 8.650105472076777327e-50, 4.985933550797199316e-49, 2.796911903237435916e-48, 1.527570118993503332e-47, + 8.126314048196993302e-47, 4.212436363948578182e-46, 2.128604050242564662e-45, 1.048938356323431072e-44, + 5.042753142653687842e-44, 2.365999225494165364e-43, 1.083813462091040325e-42, 4.848963367960316169e-42, + 2.119612873737657277e-41, 9.055947139022002648e-41, 3.782987192192666650e-40, 1.545649846917574765e-39, + 6.178909752126026357e-39, 2.417597558625940386e-38, 9.261305999966332746e-38, 3.474712971194656115e-37, + 1.277215890629181345e-36, 4.600938133935473864e-36, 1.624804314773052044e-35, 5.626808103137929972e-35, + 1.911442429947086471e-34, 6.371300415498187125e-34, 2.084444531309441237e-33, 6.695356060065574234e-33, + 2.112038435637792931e-32, 6.544802906551512393e-32, 1.992864937623987114e-31, 5.964358817764151755e-31, + 1.754973231464949500e-30, 5.078231558861773863e-30, 1.445447866528259475e-29, 4.048099759391660786e-29, + 1.115752878927994221e-28, 3.027334168442338592e-28, 8.087868498106224788e-28, 2.128106544151858936e-27, + 5.516210113930227985e-27, 1.408890921124863906e-26, 3.546520734326774807e-26, 8.800636481096360494e-26, + 2.153319509043984465e-25, 5.196136544731926346e-25, 1.236869058422202190e-24, 2.904891674490918873e-24, + 6.732707317563258763e-24, 1.540253603361391055e-23, 3.478765727687221019e-23, 7.758450079933031976e-23, + 1.708939324269830276e-22, 3.718467010568811152e-22, 7.994094376769029920e-22, 1.698336774318343123e-21, + 3.566214469724002275e-21, 7.402848534866351662e-21, 1.519411719755297549e-20, 3.083993994528608740e-20, + 6.191388817974459809e-20, 1.229625987010589227e-19, 2.416245949308411084e-19, 4.698551818749419706e-19, + 9.042992978848520439e-19, 1.722880198390020817e-18, 3.249832858354112322e-18, 6.070120594586457562e-18, + 1.122871881646098441e-17, 2.057429235664205922e-17, 3.734613207742816399e-17, 6.716694369267842075e-17, + 1.197063025055043952e-16, 2.114419661115663617e-16, 3.702017138231021853e-16, 6.425665498746337860e-16, + 1.105830903726985419e-15, 1.887156051660563224e-15, 3.193979018679125833e-15, 5.361881977473204459e-15, + 8.929318568606692809e-15, 1.475330560958586660e-14, 2.418708636765824964e-14, 3.935078350904051302e-14, + 6.354047096308654479e-14, 1.018416666466509442e-13, 1.620423782999307693e-13, 2.559817517056126166e-13, + 4.015273886294212810e-13, 6.254532358261761291e-13, 9.675981021394182858e-13, 1.486832112534566186e-12, + 2.269557377760486879e-12, 3.441736008766365832e-12, 5.185793859860652413e-12, 7.764217889314004663e-12, + 1.155228105746548036e-11, 1.708313121464262097e-11, 2.510951856086201897e-11, 3.668776978510952341e-11, + 5.329131813941740314e-11, 7.696325397299480856e-11, 1.105200723643722855e-10, 1.578221843796034825e-10, + 2.241309672940976766e-10, 3.165773201144956642e-10, 4.447730510871610704e-10, 6.216041661455164049e-10, + 8.642544905395987868e-10, 1.195519306516659349e-09, 1.645482121417189823e-09, 2.253643612941620883e-09, + 3.071610576496751310e-09, 4.166474690460445927e-09, 5.625036504185181035e-09, 7.559059638953998396e-09, + 1.011177417876491092e-08, 1.346588701906267454e-08, 1.785340092957703350e-08, 2.356759364235337519e-08, + 3.097756373337616088e-08, 4.054581171302714730e-08, 5.284939280085554173e-08, 6.860525247854168448e-08, + 8.870043714076795346e-08, 1.142279599340281637e-07, 1.465291959965373757e-07, 1.872437814520259903e-07, + 2.383680961705324062e-07, 3.023235208219232784e-07, 3.820357732606947876e-07, 4.810267467496160044e-07, + 6.035203917139166314e-07, 7.545643021775656875e-07, 9.401687861337141280e-07, 1.167465314019272078e-06, + 1.444886349199346242e-06, 1.782368666762205796e-06, 2.191582359683820240e-06, 2.686187812137005286e-06, + 3.282122985909738110e-06, 3.997923415034129149e-06, 4.855077333283880469e-06, 5.878418366687560187e-06, + 7.096558206229387964e-06, 8.542361632206236097e-06, 1.025346618920209381e-05, 1.227284870748632855e-05, + 1.464944073127878202e-05, 1.743879474552002742e-05, 2.070380288967650755e-05, 2.451546960924430874e-05, + 2.895373942298085844e-05, 3.410838067694928604e-05, 4.007992581615393488e-05, 4.698066833232878622e-05, + 5.493571614427227251e-05, 6.408410073746518169e-05, 7.457994093551813828e-05, 8.659365970069775654e-05, + 1.003132518682442285e-04, 1.159456002136906496e-04, 1.337178367385581674e-04, 1.538787455425709779e-04, + 1.767002031351005554e-04, 2.024786515302844608e-04, 2.315365989746650402e-04, 2.642241426787982083e-04, + 3.009205074706080013e-04, 3.420355938637258307e-04, 3.880115286439000550e-04, 4.393242107257947798e-04, + 4.964848447258090522e-04, 5.600414544382562271e-04, 6.305803681962314437e-04, 7.087276679481586600e-04, + 7.951505937892094439e-04, 8.905588956558126794e-04, 9.957061239230124343e-04, 1.111390850739538593e-03, + 1.238457814094548688e-03, 1.377798976832850428e-03, 1.530354493121150144e-03, 1.697113575214988470e-03, + 1.879115253782404405e-03, 2.077449025503311209e-03, 2.293255382179820056e-03, 2.527726216158548279e-03, + 2.782105097477072741e-03, 3.057687418798497807e-03, 3.355820404885606963e-03, 3.677902984083964409e-03, + 4.025385520026097270e-03, 4.399769402530814407e-03, 4.802606497446985045e-03, 5.235498455973840111e-03, + 5.700095884774212336e-03, 6.198097378977308725e-03, 6.731248420937948614e-03, 7.301340148374219834e-03, + 7.910207996239952125e-03, 8.559730217397303903e-03, 9.251826287833445298e-03, 9.988455202809488913e-03, + 1.077161367093554544e-02, 1.160333421372954856e-02, 1.248568317873621646e-02, 1.342075867475355427e-02, + 1.441068843813546585e-02, 1.545762763950860648e-02, 1.656375664055830135e-02, 1.773127871080136402e-02, + 1.896241771447260382e-02, 2.025941577780677588e-02, 2.162453094709917839e-02, 2.306003484797691421e-02, + 2.456821035631025318e-02, 2.615134929114115217e-02, 2.781175013990572523e-02, 2.955171582608151263e-02, + 3.137355152920124081e-02, 3.327956256694509270e-02, 3.527205234875621605e-02, 3.735332041012234938e-02, + 3.952566053633324126e-02, 4.179135898416228534e-02, 4.415269280953487221e-02, 4.661192830883879903e-02, + 4.917131958110712872e-02, 5.183310721786459418e-02, 5.459951712697841302e-02, 5.747275949639657337e-02, + 6.045502790319455825e-02, 6.354849857288828754e-02, 6.675532979350985865e-02, 7.007766148848641979e-02, + 7.351761495191403887e-02, 7.707729274938041525e-02, 8.075877878706524317e-02, 8.456413855143733669e-02, + 8.849541952147546057e-02, 9.255465175496720496e-02, 9.674384865008904765e-02, 1.010650078831426502e-01, + 1.055201125230189472e-01, 1.101111323226840632e-01, 1.148400251877307103e-01, 1.197087388218165293e-01, + 1.247192125486176994e-01, 1.298733793097628269e-01, 1.351731678380792159e-01, 1.406205050053816316e-01, + 1.462173183439629526e-01, 1.519655387409069424e-01, 1.578671033043359383e-01, 1.639239584007306411e-01, + 1.701380628625154331e-01, 1.765113913651907042e-01, 1.830459379734134606e-01, 1.897437198555789051e-01, + 1.966067811666385690e-01, 2.036371970991047974e-01, 2.108370781024367852e-01, 2.182085742712797843e-01, + 2.257538799033364379e-01, 2.334752382279873511e-01, 2.413749463071469410e-01, 2.494553601102403241e-01, + 2.577188997656175820e-01, 2.661680549911833443e-01, 2.748053907075124803e-01, 2.836335528372471376e-01, + 2.926552742951268547e-01, 3.018733811735925662e-01, 3.112907991295277084e-01, 3.209105599783561596e-01, + 3.307358085024083972e-01, 3.407698094811951648e-01, 3.510159549519934555e-01, 3.614777717099542274e-01, + 3.721589290577866932e-01, 3.830632468159621812e-01, 3.941947036053136035e-01, 4.055574454148868711e-01, + 4.171557944689308074e-01, 4.289942584079951543e-01, 4.410775398002453309e-01, 4.534105460003012245e-01, + 4.659983993741692944e-01, 4.788464479101668631e-01, 4.919602762371392109e-01, 5.053457170727489659e-01, + 5.190088631261786795e-01, 5.329560794812372669e-01, 5.471940164876055195e-01, 5.617296231898020413e-01, + 5.765701613254061793e-01, 5.917232199261468491e-01, 6.071967305576643327e-01, 6.229989832360855492e-01, + 6.391386430620321596e-01, 6.556247676153161584e-01, 6.724668251563812272e-01, 6.896747136835329047e-01, + 7.072587808981804764e-01, 7.252298451337033758e-01, 7.435992173071710726e-01, 7.623787239570054101e-01, + 7.815807314337971290e-01, 8.012181713158943859e-01, 8.213045671260926392e-01, 8.418540624307963733e-01, + 8.628814504084197628e-01, 8.844022049795737430e-01, 9.064325135977815717e-01, 9.289893118061069464e-01, + 9.520903196722039764e-01, 9.757540802219457353e-01, 1.000000000000000000e+00, 1.024848391894543008e+00, + 1.050320520372784475e+00, 1.076438649284173871e+00, 1.103226092399127978e+00, 1.130707266862927052e+00, + 1.158907749757141229e+00, 1.187854337974646084e+00, 1.217575111629048984e+00, 1.248099501235266386e+00, + 1.279458358915164500e+00, 1.311684033900709062e+00, 1.344810452627081143e+00, 1.378873203729832710e+00, + 1.413909628283517352e+00, 1.449958915644490754e+00, 1.487062205287898607e+00, 1.525262695058439148e+00, + 1.564605756286502811e+00, 1.605139056255971231e+00, 1.646912688547541313e+00, 1.689979311822189937e+00, + 1.734394297653598793e+00, 1.780215888066332921e+00, 1.827505363488657555e+00, 1.876327221885466881e+00, + 1.926749369898304239e+00, 1.978843326886336694e+00, 2.032684442834914613e+00, 2.088352131177556992e+00, + 2.145930117663470432e+00, 2.205506706496711366e+00, 2.267175065075584681e+00, 2.331033528772661605e+00, + 2.397185927317806037e+00, 2.465741934479827004e+00, 2.536817442887937264e+00, 2.610534965993323711e+00, + 2.687024069345184956e+00, 2.766421833546071979e+00, 2.848873351459948781e+00, 2.934532262474922666e+00, + 3.023561326873131923e+00, 3.116133043635102211e+00, 3.212430315307524598e+00, 3.312647163894682976e+00, + 3.416989502097797957e+00, 3.525675964626843197e+00, 3.638938804749809967e+00, 3.757024861729272487e+00, + 3.880196605330264341e+00, 4.008733264172298986e+00, 4.142932045347867609e+00, 4.283109453446644399e+00, + 4.429602717916437040e+00, 4.582771338567048147e+00, 4.742998759991079249e+00, 4.910694186746867507e+00, + 5.086294552335034437e+00, 5.270266656314831820e+00, 5.463109485364516396e+00, 5.665356735708146927e+00, + 5.877579556128345480e+00, 6.100389532781943879e+00, 6.334441939256981670e+00, 6.580439277782222274e+00, + 6.839135140254664526e+00, 7.111338420820842566e+00, 7.397917915172903763e+00, 7.699807345544508469e+00, + 8.018010854664294474e+00, 8.353609016702406728e+00, 8.707765418592385473e+00, 9.081733871099147484e+00, + 9.476866315716376006e+00, 9.894621501007146275e+00, 1.033657451045679019e+01, 1.080442723340841910e+01, + 1.130001988133777781e+01, 1.182534366375335115e+01, 1.238255475156052427e+01, 1.297398967101161563e+01, + 1.360218228861306245e+01, 1.426988256684760289e+01, 1.498007729260327644e+01, 1.573601300513857081e+01, + 1.654122137866316500e+01, 1.739954734664685784e+01, 1.831518029132688981e+01, 1.929268866318984532e+01, + 2.033705844217826172e+01, 2.145373590584482942e+01, 2.264867523060898736e+01, 2.392839152177298272e+01, + 2.530001994731418268e+01, 2.677138174118011529e+01, 2.835105794560498805e+01, 3.004847188085487195e+01, + 3.187398146713610639e+01, 3.383898267989664904e+01, 3.595602559959535672e+01, 3.823894472392493310e+01, + 4.070300544879345396e+01, 4.336506889917953679e+01, 4.624377760823269784e+01, 4.935976490967979071e+01, + 5.273589133292714765e+01, 5.639751178186770847e+01, 6.037277784867852275e+01, 6.469298027622754351e+01, + 6.939293735292118365e+01, 7.451143592061966836e+01, 8.009173272176674066e+01, 8.618212503236856949e+01, + 9.283660095406551480e+01, 1.001155814082968890e+02, 1.080867678325352448e+02, 1.168261118752949279e+02, + 1.264189260858047240e+02, 1.369611577708331715e+02, 1.485608519349011866e+02, 1.613398336385932743e+02, + 1.754356453320629017e+02, 1.910037809024609590e+02, 2.082202655019913565e+02, 2.272846389233001078e+02, + 2.484234106336023257e+02, 2.718940668983047258e+02, 2.979897251188232016e+02, 3.270445480633676878e+02, + 3.594400516741229885e+02, 3.956124653087335485e+02, 4.360613334959077953e+02, 4.813595846269808355e+02, + 5.321653357808338203e+02, 5.892357556996862196e+02, 6.534433717775449045e+02, 7.257952842284018994e+02, + 8.074558443729566627e+02, 8.997734679339701200e+02, 1.004312392957944252e+03, 1.122890361185594877e+03, + 1.257623408459775530e+03, 1.410979202907522234e+03, 1.585840680166573460e+03, 1.785582106601447262e+03, + 2.014160171499825914e+03, 2.276223289283167479e+03, 2.577243010007973485e+03, 2.923672325162804598e+03, + 3.323136759290736047e+03, 3.784665511113575050e+03, 4.318971620160236406e+03, 4.938792274850918489e+03, + 5.659303058273368331e+03, 6.498623292476395004e+03, 7.478433875318933386e+03, 8.624734342286166238e+03, + 9.968772633484590145e+03, 1.154818959559393902e+04, 1.340843110702649390e+04, 1.560449453908580443e+04, + 1.820309391023133793e+04, 2.128535066649680777e+04, 2.495014598048375046e+04, 2.931830770482188047e+04, + 3.453785313845473397e+04, 4.079057084931056631e+04, 4.830030527863206410e+04, 5.734341246586992004e+04, + 6.826199159022146453e+04, 8.148067525594191464e+04, 9.752799507478730867e+04, 1.170636462204808295e+05, + 1.409133795481584143e+05, 1.701137853111825512e+05, 2.059699426710509940e+05, 2.501298539735692463e+05, + 3.046808435555379486e+05, 3.722747886360361411e+05, 4.562913164460176067e+05, 5.610511554921845541e+05, + 6.920959565810343691e+05, 8.565564972181198149e+05, 1.063638800552326000e+06, 1.325268101226286025e+06, + 1.656944841847240121e+06, 2.078886479301160156e+06, 2.617555920130068069e+06, 3.307714852226224955e+06, + 4.195192293202626259e+06, 5.340631300250745566e+06, 6.824578495767020734e+06, 8.754424053248831818e+06, + 1.127390159772263517e+07, 1.457614342739689625e+07, 1.892169326841938100e+07, 2.466345986800667442e+07, + 3.228142821711217588e+07, 4.243114571539869754e+07, 5.601173714434088431e+07, 7.426172509723072112e+07, + 9.889461357830121731e+07, 1.322915875470427182e+08, 1.777766240727455981e+08, 2.400110583389834263e+08, + 3.255621033641982742e+08, 4.437258820593761403e+08, 6.077246218504877165e+08, 8.364565879857375417e+08, + 1.157066594326456169e+09, 1.608740826498742961e+09, 2.248337657948688269e+09, 3.158785978851336228e+09, + 4.461677081363911380e+09, 6.336244831048209270e+09, 9.048130159588677560e+09, 1.299321362309972265e+10, + 1.876478261212947929e+10, 2.725703976712888971e+10, 3.982553459064288940e+10, 5.853727794017415415e+10, + 8.656299089553103385e+10, 1.287959733041898747e+11, 1.928345065430099883e+11, 2.905510467545806044e+11, + 4.406145488098485809e+11, 6.725708918778493152e+11, 1.033486938212196930e+12, 1.598840557086695854e+12, + 2.490490134218272825e+12, 3.906528466724583921e+12, 6.171225147961354244e+12, 9.819163736485109137e+12, + 1.573800106991564475e+13, 2.541245461530031221e+13, 4.134437628407981776e+13, 6.778141973485971528e+13, + 1.119906286595884492e+14, 1.865016806041768967e+14, 3.130890948724989738e+14, 5.298978847669068280e+14, + 9.042973899804181753e+14, 1.556259036818991439e+15, 2.701230066368200812e+15, 4.729430105054711279e+15, + 8.353779033096586530e+15, 1.488827606293191651e+16, 2.677653466031614956e+16, 4.860434481369499270e+16, + 8.905735519300993312e+16, 1.647413728306871552e+17, 3.077081325673016377e+17, 5.804234101329097680e+17, + 1.105828570628099614e+18, 2.128315358808074026e+18, 4.138651532085235581e+18, 8.132554212123920035e+18, + 1.615146503312570855e+19, 3.242548467260718193e+19, 6.581494581080701321e+19, 1.350831366183090003e+20, + 2.804093832520937396e+20, 5.888113683467563837e+20, 1.250923435312468276e+21, 2.689280279098215635e+21, + 5.851582825664479700e+21, 1.288917231788944660e+22, 2.874582763768997631e+22, 6.492437335109217869e+22, + 1.485286605867082177e+23, 3.442469159113307066e+23, 8.084930196860438207e+23, 1.924506778048094878e+24, + 4.643992662491470729e+24, 1.136281452083591334e+25, 2.819664891060694571e+25, 7.097781559991856367e+25, + 1.812838850127688486e+26, 4.699012851344539124e+26, 1.236419707162832951e+27, 3.303236261210411286e+27, + 8.962558097638891218e+27, 2.470294852986226117e+28, 6.918270960555942883e+28, 1.969189447958411510e+29, + 5.698092609453981289e+29, 1.676626156396922084e+30, 5.017901520171556970e+30, 1.527929892279834489e+31, + 4.734762318366711949e+31, 1.493572546446777040e+32, 4.797441164681908184e+32, 1.569538296400998732e+33, + 5.231651156910242454e+33, 1.777206511525290941e+34, 6.154587299576916134e+34, 2.173469781356604872e+35, + 7.829529896526581616e+35, 2.877935554073076917e+36, 1.079761320923458592e+37, 4.136337730951207042e+37, + 1.618408489711185844e+38, 6.469770640447824771e+38, 2.643413654859316358e+39, 1.104246728308525703e+40, + 4.717842641881260665e+40, 2.062296462389327711e+41, 9.226680005161257219e+41, 4.226544071632731963e+42, + 1.983043729707066518e+43, 9.533448690970155039e+43, 4.697914578740208606e+44, 2.373923101980436574e+45, + 1.230570211868531753e+46, 6.546344338411695147e+46, 3.575371819335804914e+47, 2.005642453538335506e+48, + 1.156055268028903078e+49, 6.849867807870312958e+49, 4.174004815218951121e+50, 2.616872034052857472e+51, + 1.688750346837297725e+52, 1.122275666009684101e+53, 7.683968740248677071e+53, 5.422849612654278583e+54, + 3.946686701799533415e+55, 2.963543587288132884e+56, 2.297086395798939516e+57, 1.838856414208555761e+58, + 1.521049475711243996e+59, 1.300732291175071112e+60, 1.150559591141716740e+61, 1.053265997373725461e+62, + 9.984114209879020836e+62, 9.805325615938694719e+63, 9.982463564199115995e+64, 1.054102211457911410e+66, + 1.155172684780782463e+67, 1.314571302334116663e+68, 1.554362407685457310e+69, 1.910791206002645077e+70, + 2.443616403890711206e+71, 3.252983822318823232e+72, 4.510600140020139737e+73, 6.518821831001902447e+74, + 9.825834460774267633e+75, 1.545692063622722856e+77, 2.539346088408163253e+78, 4.359763993811836117e+79, + 7.827943627464404744e+80, 1.470896877674301183e+82, 2.894527071420674290e+83, 5.969662541607915492e+84, + 1.291277613981057357e+86, 2.931656535626877923e+87, 6.991353547531463135e+88, 1.752671194525972852e+90, + 4.622450137056020715e+91, 1.283581933169566226e+93, 3.755839001138390788e+94, 1.158991729845978702e+96, + 3.774916315438862678e+97, 1.298844894462381673e+99, 4.725038949943384889e+100, 1.819000031203286740e+102, + 7.416966330876906188e+103, 3.206116996910598204e+105, 1.470588770071975193e+107, 7.164198238238641057e+108, + 3.710397624567077270e+110, 2.044882454279709373e+112, 1.200428778654730225e+114, 7.513744370030172114e+115, + 5.019575746343410636e+117, 3.582726927665698318e+119, 2.734947775877248560e+121, 2.235283764078944248e+123, + 1.958084751118243323e+125, 1.840431913109305657e+127, 1.858143260692831108e+129, 2.017432949655777136e+131, + 2.358177615888101494e+133, 2.971092974178603610e+135, 4.039532321435816302e+137, 5.933923069661132195e+139, + 9.429263693444953240e+141, 1.622841456932873872e+144, 3.028884476067694180e+146, 6.138356175015339477e+148, + 1.352531557191942648e+151, 3.244447362295582945e+153, }; + +__constant__ double* m_abscissas_double[8] = { + m_abscissas_double_1, + m_abscissas_double_2, + m_abscissas_double_3, + m_abscissas_double_4, + m_abscissas_double_5, + m_abscissas_double_6, + m_abscissas_double_7, + m_abscissas_double_8, +}; + +__constant__ double m_weights_double_1[13] = + { 2.703640234162693583e-160, 3.100862940179668765e-58, 5.828334625665462970e-21, 1.628894422402653830e-07, + 8.129907377394029252e-03, 2.851214447180802931e-01, 1.228894002317118650e+00, 9.374610761705565881e+00, + 6.136846875218162167e+02, 8.367995944653844271e+06, 2.286032371256753845e+17, 9.029964022492184559e+44, + 1.637973037681055808e+119, }; + +__constant__ double m_weights_double_2[12] = + { 1.029757744225565290e-96, 5.564174008086804112e-35, 1.534846576427062716e-12, 1.519539651119905182e-04, + 7.878691652861874032e-02, 6.288072016384128612e-01, 2.842403831496369386e+00, 5.152309209026500589e+01, + 2.554172947873109927e+04, 8.291547503290989754e+10, 6.794911791960761587e+27, 1.108995159102362663e+73, }; + +__constant__ double m_weights_double_3[25] = + { 1.545310485347377408e-124, 4.549745016271158113e-75, 3.781189989988588481e-45, 4.369440793304363176e-27, + 3.253896178006708087e-16, 1.057239289288944987e-09, 7.826174663495492476e-06, 1.459783224353939263e-03, + 2.972970552567852420e-02, 1.637950661613330541e-01, 4.392303913269138921e-01, 8.744243777287317807e-01, + 1.804759465860974506e+00, 4.894937215283148383e+00, 2.036214502429748943e+01, 1.576549789679037479e+02, + 3.249553828744194733e+03, 3.335686029489862584e+05, 4.858218914917275532e+08, 5.655171002571584464e+13, + 9.084276291356790926e+21, 2.202757570781655071e+35, 1.851176020895552142e+57, 1.873046373612647920e+93, + 3.113183070605141140e+152, }; + +__constant__ double m_weights_double_4[49] = + { 2.690380169654157101e-141, 9.388760099830475385e-110, 3.267856956418766261e-85, 4.012903562780032075e-66, + 2.794595941054873674e-51, 9.598140333687791635e-40, 8.762766371925782803e-31, 7.896919977115783593e-24, + 1.951680620313826776e-18, 2.931867534349928041e-14, 4.976350908135118762e-11, 1.546933241860617074e-08, + 1.283189791774752963e-06, 3.809052946018782340e-05, 5.087526585392884730e-04, 3.656819625189471368e-03, + 1.627679402690602992e-02, 5.011672130624018967e-02, 1.165913368715250324e-01, 2.201514148384271336e-01, + 3.581909054968942386e-01, 5.288599003801643436e-01, 7.422823219366348741e-01, 1.032914080772662205e+00, + 1.478415067523268199e+00, 2.242226697017918644e+00, 3.684755742578570582e+00, 6.677326887819023056e+00, + 1.358063058433697357e+01, 3.171262375809110066e+01, 8.776338468947827779e+01, 3.006939713363920293e+02, + 1.352196150715330628e+03, 8.616353573310419356e+03, 8.591849573350877359e+04, 1.523635814554291966e+06, + 5.663834603448267056e+07, 5.450828629396188577e+09, 1.780881993484818221e+12, 2.797112703281894578e+15, + 3.300887168363313931e+19, 5.192538272313512016e+24, 2.273085973059979872e+31, 7.124498195222272142e+39, + 5.379592741425673874e+50, 4.647296508337283075e+64, 3.395147156494395571e+82, 2.736576372417856435e+105, + 6.584825756536212781e+134, }; + +__constant__ double m_weights_double_5[98] = + { 1.692276285171240629e-150, 1.180420021590838281e-132, 6.494931071412232065e-117, 4.979673804239645358e-103, + 8.790122245397054202e-91, 5.564311726870413043e-80, 1.867634664877268411e-70, 4.693767384843440310e-62, + 1.197772698674604837e-54, 4.060530886983702887e-48, 2.318268710612758367e-42, 2.748088060676949794e-37, + 8.136086869664039226e-33, 7.081491999860360593e-29, 2.092407629019781417e-25, 2.383020547076997517e-22, + 1.170143938604536054e-19, 2.734857915002515580e-17, 3.319894174569245506e-15, 2.260825106530477104e-13, + 9.244747974241858562e-12, 2.410325858091057071e-10, 4.224928060220423782e-09, 5.217223349652829804e-08, + 4.730110697329046717e-07, 3.265522864288710545e-06, 1.772851678458610971e-05, 7.787346612077215804e-05, + 2.838101678971546354e-04, 8.775026198694109646e-04, 2.347474744139291716e-03, 5.529174974874315725e-03, + 1.164520226280038968e-02, 2.223487842904240574e-02, 3.896253311038730452e-02, 6.334975706136386464e-02, + 9.651712033300261848e-02, 1.390236708907266445e-01, 1.908593745910709887e-01, 2.515965688234414960e-01, + 3.206651646562737595e-01, 3.976974208167367099e-01, 4.828935799767836828e-01, 5.773826389735376677e-01, + 6.835838865575605461e-01, 8.056083579298257627e-01, 9.497742078309479997e-01, 1.125351459431134254e+00, + 1.345711576612114788e+00, 1.630156867495860456e+00, 2.006880650908830857e+00, 2.517828844916874130e+00, + 3.226826819856410846e+00, 4.233461155863004269e+00, 5.697400323487776530e+00, 7.882247346334201378e+00, + 1.123717929435969530e+01, 1.655437952523069781e+01, 2.528458931361129124e+01, 4.019700050163276117e+01, + 6.682515670231120695e+01, 1.168022589948424530e+02, 2.160045684819153702e+02, 4.257255901158116698e+02, + 9.017180693982791021e+02, 2.072151523320542727e+03, 5.222689557952776194e+03, 1.461663959276604441e+04, + 4.606455611513396576e+04, 1.660950339384278845e+05, 6.976630616605097333e+05, 3.484240083705972727e+06, + 2.117385064786894718e+07, 1.607368605379557548e+08, 1.570235957877638143e+09, 2.041619284762317483e+10, + 3.670425964529826371e+11, 9.527196643411724126e+12, 3.749667772735766186e+14, 2.365380223523087981e+16, + 2.546815287226970627e+18, 5.026010591299970789e+20, 1.970775914722195502e+23, 1.682531038342715298e+26, + 3.469062187981719410e+29, 1.942614547946028081e+33, 3.375034694941022784e+37, 2.115298406181711256e+42, + 5.673738540911562268e+47, 7.904099301170483654e+53, 7.121903115084356741e+60, 5.321820777644930491e+68, + 4.370977753639010591e+77, 5.429657931755513797e+87, 1.464602226824232950e+99, 1.292445035662836561e+112, + 5.936633203060705474e+126, 2.402419924621336913e+143, }; + +__constant__ double m_weights_double_6[196] = + { 2.552410363565288863e-155, 7.965872719315690060e-146, 6.586401422963018216e-137, 1.563673437419490296e-128, + 1.149636272392214573e-120, 2.810189759625314580e-113, 2.441446149780773329e-106, 8.026292508555041710e-100, + 1.059034284623927886e-93, 5.927259046205893861e-88, 1.482220909125121967e-82, 1.738946448501809732e-77, + 1.002047910184021813e-72, 2.960929073720769637e-68, 4.671749731809402860e-64, 4.088398674807775827e-60, + 2.056642628601930023e-56, 6.149878578966749305e-53, 1.128142221531950274e-49, 1.307702777646013040e-46, + 9.848757125541659318e-44, 4.946847667192787369e-41, 1.698284656321589089e-38, 4.077947349805764486e-36, + 6.998897321243266048e-34, 8.762183229651405846e-32, 8.156281709801700633e-30, 5.747366069381804213e-28, + 3.117951907317865517e-26, 1.323052992594482858e-24, 4.457166057119926322e-23, 1.208896132634708032e-21, + 2.674697849739340358e-20, 4.887394807742436672e-19, 7.461632083041868391e-18, 9.622230748739818989e-17, + 1.058884510032627118e-15, 1.003988180288807180e-14, 8.276358838778374127e-14, 5.982281469656734375e-13, + 3.821855766886203088e-12, 2.174279097299082001e-11, 1.109294120074848583e-10, 5.109055596902086022e-10, + 2.137447956882816268e-09, 8.170468538364022161e-09, 2.869308592926374871e-08, 9.305185930419436742e-08, + 2.800231592227134982e-07, 7.855263634214717091e-07, 2.062924236714395731e-06, 5.092224131071637441e-06, + 1.185972357373608535e-05, 2.615333473470835518e-05, 5.479175746096322166e-05, 1.093962713107868416e-04, + 2.087714243290528595e-04, 3.818797556417767457e-04, 6.712796918790164790e-04, 1.136760145626956604e-03, + 1.858775505765622915e-03, 2.941191222579735746e-03, 4.512821350378020080e-03, 6.727293426938802892e-03, + 9.760915371480980900e-03, 1.380842853102550981e-02, 1.907678055354397196e-02, 2.577730275571060412e-02, + 3.411688991056810143e-02, 4.428892397843486143e-02, 5.646473816310556552e-02, 7.078637998740884103e-02, + 8.736131246718460273e-02, 1.062595125372295046e-01, 1.275132133780278017e-01, 1.511193209351630349e-01, + 1.770443400812491404e-01, 2.052314915777496186e-01, 2.356095985715091716e-01, 2.681032744853198083e-01, + 3.026439500331752405e-01, 3.391813282438962329e-01, 3.776949427111484449e-01, 4.182056049753837852e-01, + 4.607866519948383101e-01, 5.055750360563806155e-01, 5.527824318481410262e-01, 6.027066663808878454e-01, + 6.557439076684384801e-01, 7.124021812071310501e-01, 7.733169258916167748e-01, 8.392694625821144443e-01, + 9.112094418201526544e-01, 9.902825786957198607e-01, 1.077865293953107863e+00, 1.175608288920191064e+00, + 1.285491624542001346e+00, 1.409894601042286311e+00, 1.551684711657329886e+00, 1.714331263928885829e+00, + 1.902051053858215699e+00, 2.119995922515087770e+00, 2.374495377438728901e+00, 2.673372087884984440e+00, + 3.026354489757871517e+00, 3.445619726158519068e+00, 3.946512819227006419e+00, 4.548505964859933724e+00, + 5.276487613615791435e+00, 6.162508226184798743e+00, 7.248163842886806184e+00, 8.587878410768473380e+00, + 1.025346434903602082e+01, 1.234051869120733230e+01, 1.497748183201988157e+01, 1.833859935862139637e+01, + 2.266266859437541631e+01, 2.828045768298752298e+01, 3.565528397044830339e+01, 4.544381261232990127e+01, + 5.858833744254070379e+01, 7.645876087681923606e+01, 1.010741758687003802e+02, 1.354538987141142977e+02, + 1.841824059064608872e+02, 2.543337025162468240e+02, 3.570103970895535977e+02, 5.099537256432247190e+02, + 7.420561390174965949e+02, 1.101323941193719451e+03, 1.669232910686306616e+03, 2.587203282090385703e+03, + 4.106608602134535014e+03, 6.685657263550896700e+03, 1.118216368762133982e+04, 1.924811115485038079e+04, + 3.416174865734933127e+04, 6.263882227839496242e+04, 1.189094418952240294e+05, 2.342262528110389793e+05, + 4.798899889628646876e+05, 1.025279649144740527e+06, 2.290428015483177407e+06, 5.365618820221241118e+06, + 1.322172034826883742e+07, 3.438296542047893623e+07, 9.468905314460992170e+07, 2.771843378168242512e+08, + 8.658950437199969679e+08, 2.898779165825890846e+09, 1.044627762990198184e+10, 4.071673625087267154e+10, + 1.725245696783106160e+11, 7.989856904303845909e+11, 4.067537100664303783e+12, 2.290253922913114847e+13, + 1.435560574531699914e+14, 1.008680130601194048e+15, 8.003530334765274913e+15, 7.227937568629809266e+16, + 7.491693576707361828e+17, 8.991671234614216799e+18, 1.261556024888540618e+20, 2.090038400033346091e+21, + 4.132773073376509056e+22, 9.865671928781943336e+23, 2.877978132616007671e+25, 1.039303004928044064e+27, + 4.710544722984128252e+28, 2.719194692980296464e+30, 2.030608169419634520e+32, 1.994536427964099457e+34, + 2.622806931876485852e+36, 4.705142628855489738e+38, 1.174794916996875010e+41, 4.170574236544843559e+43, + 2.153441953645800917e+46, 1.656794933445123415e+49, 1.948830907651317326e+52, 3.601980393005358786e+55, + 1.077033440153993124e+59, 5.374188883861674378e+62, 4.625267105826449467e+66, 7.111646979020385006e+70, + 2.027996051444846521e+75, 1.116168784120367146e+80, 1.237019821283735086e+85, 2.888108172342166477e+90, + 1.490426937972460544e+96, 1.789306677271856318e+102, 5.276973875344766848e+108, 4.051217867886536330e+115, + 8.611617868168979525e+122, 5.412634353380155695e+130, 1.078756609821147465e+139, 7.344353246966125053e+147, }; + +__constant__ double m_weights_double_7[393] = + { 8.688318611421924613e-158, 6.864317997043424201e-153, 3.829638174036322920e-148, 1.524985558970066863e-143, + 4.379527631402474835e-139, 9.162408388991747001e-135, 1.410086556664696347e-130, 1.611529786006329005e-126, + 1.380269212504431613e-122, 8.938739565456142404e-119, 4.414803004265274778e-115, 1.676831992534574674e-111, + 4.937648515671545377e-108, 1.136068312653058895e-104, 2.057969760853201132e-101, 2.956779836249922681e-98, + 3.393449014375824853e-95, 3.132619285740674842e-92, 2.341677665639346254e-89, 1.426656997926173190e-86, + 7.128825597334931865e-84, 2.939485275517928205e-81, 1.006113300119903410e-78, 2.874969402023240560e-76, + 6.896713338909433222e-74, 1.396405038640012785e-71, 2.398869799873387326e-69, 3.514180228970525006e-67, + 4.411557600438730779e-65, 4.768408435763044172e-63, 4.458287229998440383e-61, 3.621710763086768959e-59, + 2.567373174003034094e-57, 1.594829856885795944e-55, 8.716746897177859412e-54, 4.208424534880021226e-52, + 1.801637343401221381e-50, 6.864432292330768862e-49, 2.336084584516383243e-47, 7.125716658075193173e-46, + 1.954733295862350631e-44, 4.838195020814970471e-43, 1.083903033389729471e-41, 2.204655424309513426e-40, + 4.083431629921110537e-39, 6.907095608064865023e-38, 1.069951518082577963e-36, 1.521972185061747284e-35, + 1.993254198127980161e-34, 2.409552194902670884e-33, 2.695243589253751811e-32, 2.796309045342585624e-31, + 2.697138787161831243e-30, 2.423968619042656074e-29, 2.034233848004972409e-28, 1.597498662808006882e-27, + 1.176341105034547043e-26, 8.138404856556384931e-26, 5.300199402716282910e-25, 3.255367628680633536e-24, + 1.889060856810273071e-23, 1.037502167741821871e-22, 5.402129194695882094e-22, 2.671080147950250592e-21, + 1.256163163817414397e-20, 5.627458451375099018e-20, 2.405110192151924414e-19, 9.820723025892385774e-19, + 3.836610965933493002e-18, 1.435949417965440387e-17, 5.155736116435221852e-17, 1.778106820243535736e-16, + 5.897650538103448384e-16, 1.883545377386949394e-15, 5.799022727889041128e-15, 1.723080101027408120e-14, + 4.946559668895564981e-14, 1.373437058883951037e-13, 3.692057356296675476e-13, 9.618669754374864080e-13, + 2.430904641718059201e-12, 5.965319652795549281e-12, 1.422677541958913512e-11, 3.300412010407028696e-11, + 7.453993539444124847e-11, 1.640317480539372495e-10, 3.519919455549922227e-10, 7.371241496931924727e-10, + 1.507573517782825692e-09, 3.013444008176544118e-09, 5.891170930525923854e-09, 1.127175867596519203e-08, + 2.112135943063526334e-08, 3.878572405868819131e-08, 6.984140168311147329e-08, 1.233979234102365865e-07, + 2.140481233406505212e-07, 3.647293211756793211e-07, 6.108366265875129839e-07, 1.006020283089617901e-06, + 1.630199379920459998e-06, 2.600430208375972125e-06, 4.085372746054298735e-06, 6.324194831966406940e-06, + 9.650830226718535837e-06, 1.452455211307694488e-05, 2.156782506321975658e-05, 3.161234361554654466e-05, + 4.575404320696170555e-05, 6.541767069965264068e-05, 9.243122234114186712e-05, 1.291101968446571125e-04, + 1.783511762821284409e-04, 2.437337497712608884e-04, 3.296292528289701234e-04, 4.413142327104518440e-04, + 5.850859955683163216e-04, 7.683770763700705263e-04, 9.998650298180469208e-04, 1.289573601590465490e-03, + 1.648961132392222413e-03, 2.090991995585424661e-03, 2.630186988492201910e-03, 3.282648895332118799e-03, + 4.066059914467245175e-03, 4.999648283080481820e-03, 6.104122218554241819e-03, 7.401570199659662364e-03, + 8.915327597805008451e-03, 1.066981070009509413e-02, 1.269032020049755525e-02, 1.500281723149735994e-02, + 1.763367592672867332e-02, 2.060941730962251417e-02, 2.395642996410886880e-02, 2.770068343772389725e-02, + 3.186744063963193757e-02, 3.648097561865623097e-02, 4.156430303997019336e-02, 4.713892543167989540e-02, + 5.322460385886412684e-02, 5.983915712308283792e-02, 6.699829390463281224e-02, 7.471548149065050122e-02, + 8.300185389391494996e-02, 9.186616129460712899e-02, 1.013147618591979452e-01, 1.113516561340355690e-01, + 1.219785634003157786e-01, 1.331950386328042665e-01, 1.449986280439946752e-01, 1.573850606313672716e-01, + 1.703484726870446791e-01, 1.838816618814874884e-01, 1.979763672973498048e-01, 2.126235716643688402e-01, + 2.278138220265254991e-01, 2.435375651517067386e-01, 2.597854941629632707e-01, 2.765489031191654411e-01, + 2.938200465906351752e-01, 3.115925016510994851e-01, 3.298615301301230823e-01, 3.486244394295739435e-01, + 3.678809406939879716e-01, 3.876335036292959599e-01, 4.078877077798518471e-01, 4.286525905940105684e-01, + 4.499409931290513174e-01, 4.717699047639316286e-01, 4.941608088016098926e-01, 5.171400313514193966e-01, + 5.407390963876342256e-01, 5.649950903858123945e-01, 5.899510404480374918e-01, 6.156563103475134535e-01, + 6.421670194591982411e-01, 6.695464901047961714e-01, 6.978657294374126896e-01, 7.272039526349696447e-01, + 7.576491548751669105e-01, 7.892987403432202489e-01, 8.222602173936578230e-01, 8.566519699682320391e-01, + 8.926041164852169437e-01, 9.302594686857616145e-01, 9.697746043788558519e-01, 1.011321069700320644e+00, + 1.055086728430498711e+00, 1.101277278143300224e+00, 1.150117955536247302e+00, 1.201855456275760449e+00, + 1.256760098152647779e+00, 1.315128260359919236e+00, 1.377285136373095709e+00, 1.443587843343442141e+00, + 1.514428937238563465e+00, 1.590240390338335337e+00, 1.671498096302065311e+00, 1.758726978084942299e+00, + 1.852506785760205887e+00, 1.953478685110838140e+00, 2.062352754065132708e+00, 2.179916523112736371e+00, + 2.307044718290330681e+00, 2.444710391817196957e+00, 2.593997656772008968e+00, 2.756116279277535182e+00, + 2.932418425642610903e+00, 3.124417914187536020e+00, 3.333812383735923205e+00, 3.562508865047068391e+00, + 3.812653330296280988e+00, 4.086664902155689132e+00, 4.387275531849634155e+00, 4.717576109385405085e+00, + 5.081070154695596855e+00, 5.481736462718817995e+00, 5.924102347216244340e+00, 6.413329458204850426e+00, + 6.955314549766230740e+00, 7.556808065486941215e+00, 8.225554008952760095e+00, 8.970455302965185036e+00, + 9.801769746699598466e+00, 1.073134279679936208e+01, 1.177288477943655549e+01, 1.294230185297226511e+01, + 1.425809217068106541e+01, 1.574182134943112610e+01, 1.741869467329444792e+01, 1.931824763074534781e+01, + 2.147518163232618457e+01, 2.393037838236259586e+01, 2.673213477270754163e+01, 2.993767083537830673e+01, + 3.361497689655818107e+01, 3.784508348524495401e+01, 4.272485990900652026e+01, 4.837047622725585887e+01, + 5.492170063250241752e+01, 6.254725265973777743e+01, 7.145149574983117631e+01, 8.188283528217430591e+01, + 9.414429671899321190e+01, 1.086069017070108772e+02, 1.257266497442910506e+02, 1.460661655727672308e+02, + 1.703224100743601641e+02, 1.993623058409479084e+02, 2.342687403011957198e+02, 2.764002385528330658e+02, + 3.274687277481591846e+02, 3.896413615832930151e+02, 4.656745019682919178e+02, 5.590908996105107215e+02, + 6.744152109571297875e+02, 8.174887172033244140e+02, 9.958921680864290197e+02, 1.219517071629880108e+03, + 1.501341972869855447e+03, 1.858493492282554856e+03, 2.313705362529768409e+03, 2.897337235279879262e+03, + 3.650185874628374320e+03, 4.627425468074182920e+03, 5.904167858279871204e+03, 7.583363128219763259e+03, + 9.807105719965428472e+03, 1.277293273832114230e+04, 1.675749596877978193e+04, 2.215121038263169759e+04, + 2.950937349291504490e+04, 3.962820433513419525e+04, 5.365890489878942635e+04, 7.328024305737981431e+04, + 1.009620167752942516e+05, 1.403709568321740997e+05, 1.970019955923188504e+05, 2.791695960502382133e+05, + 3.995801250202947693e+05, 5.778515877588312220e+05, 8.445944401474017243e+05, 1.248092975135001687e+06, + 1.865367859966950385e+06, 2.820705292493674480e+06, 4.317063433830483499e+06, 6.689961127164684387e+06, + 1.050111601631327499e+07, 1.670327884792325766e+07, 2.693430470211696200e+07, 4.404906898054894166e+07, + 7.309535640536363311e+07, 1.231306812701882145e+08, 2.106560568719367745e+08, 3.662073971851359192e+08, + 6.472124787519330196e+08, 1.163486593592585616e+09, 2.128658395254150452e+09, 3.965732938755983605e+09, + 7.527735928223242836e+09, 1.456757162128879538e+10, 2.875798636941021041e+10, 5.794999654160054887e+10, + 1.192767536774485257e+11, 2.509334090779650360e+11, 5.399624414800303207e+11, 1.189276111740286910e+12, + 2.683103883355551677e+12, 6.205255919751506427e+12, 1.472284072112162717e+13, 3.586628373992547853e+13, + 8.978594107356889337e+13, 2.311710197091641250e+14, 6.127020712804348908e+14, 1.673232679378485978e+15, + 4.712671499032329365e+15, 1.370275025680988289e+16, 4.117347054027612886e+16, 1.279822436878842710e+17, + 4.119762767831332886e+17, 1.374888606936629814e+18, 4.762483833659790733e+18, 1.714288404980390540e+19, + 6.420200704842635702e+19, 2.504808062315322558e+20, 1.019355251138167687e+21, 4.332952958521756932e+21, + 1.926416464889827426e+22, 8.971059571108856501e+22, 4.382317748928748816e+23, 2.249003059943548727e+24, + 1.214458587662725100e+25, 6.911683912813140938e+25, 4.152578123301633020e+26, 2.638346388179288086e+27, + 1.775811490887700718e+28, 1.268552401544524965e+29, 9.635786341213661742e+29, 7.797939379813000783e+30, + 6.736900087983560033e+31, 6.226288752443836475e+32, 6.169035287163451891e+33, 6.567250104576983172e+34, + 7.528666735185428595e+35, 9.316271421365627344e+36, 1.247410737003664698e+38, 1.811787648043939987e+39, + 2.861918583157116420e+40, 4.929657099622567574e+41, 9.284951278562156071e+42, 1.917687997037326435e+44, + 4.355948096683946408e+45, 1.091453486585817118e+47, 3.026206402784023251e+48, 9.314478983991942688e+49, + 3.193195693823940775e+51, 1.223447678968662613e+53, 5.257403184148516426e+54, 2.543108925126136766e+56, + 1.389947584026783879e+58, 8.616987336205957549e+59, 6.083777056769299984e+61, 4.911841077800001710e+63, + 4.554259483169784661e+65, 4.870815185962582259e+67, 6.036211886847067841e+69, 8.708377755587698026e+71, + 1.469655296381977267e+74, 2.915822924489215887e+76, 6.836044306573246016e+78, 1.903917300559946782e+81, + 6.333813341980360028e+83, 2.531082268773868753e+86, 1.222077360592898816e+89, 7.172167453276776330e+91, + 5.148160232410244898e+94, 4.548619807672339638e+97, 4.979632843475864923e+100, 6.800802744782331957e+103, + 1.166855497965918386e+107, 2.533457765534279043e+110, 7.012864641215147208e+113, 2.494083354169569414e+117, + 1.148722178881219993e+121, 6.908313932158993510e+124, 5.470912484744367184e+128, 5.755359832684120769e+132, + 8.115681923907451939e+136, 1.548304780334447081e+141, 4.034912159113614601e+145, 1.450632759611715526e+150, + 7.268799665580789770e+154, }; + +__constant__ double m_weights_double_8[786] = + { 4.901759085947701448e-159, 1.505832423620814399e-156, 4.231872109262999523e-154, 1.089479701785106001e-151, + 2.572922387150651649e-149, 5.581311054334156941e-147, 1.113575900126970040e-144, 2.046165051332286084e-142, + 3.466994885004770636e-140, 5.423795404073501922e-138, 7.843833272402847010e-136, 1.049922957933194415e-133, + 1.302301071957418603e-131, 1.498659737828393008e-129, 1.601906622414286282e-127, 1.592248618401983561e-125, + 1.473375345916436274e-123, 1.270651551394009593e-121, 1.022408263525766209e-119, 7.683762602329562781e-118, + 5.399268127233373186e-116, 3.551074274853494676e-114, 2.188235409519121010e-112, 1.264667515430816934e-110, + 6.861807566737243712e-109, 3.498691686825209963e-107, 1.678016807398375157e-105, 7.577439431441931490e-104, + 3.224703770159386809e-102, 1.294487090677705963e-100, 4.906133250963454139e-99, 1.757121317988153326e-97, + 5.952042491454320383e-96, 1.908566653286417264e-94, 5.798224459236429212e-93, 1.670293239978334727e-91, + 4.566236673398083038e-90, 1.185617342791547945e-88, 2.926160027801296929e-87, 6.870061134126707137e-86, + 1.535565783500379945e-84, 3.270036736778401257e-83, 6.639558007206580362e-82, 1.286319750967398593e-80, + 2.379566581139022958e-79, 4.206268231398883425e-78, 7.109719237833379433e-77, 1.149915104115372777e-75, + 1.780876201255594220e-74, 2.642703796179329883e-73, 3.760085375941719327e-72, 5.132920951124251993e-71, + 6.727100274601427696e-70, 8.469585621347697498e-69, 1.025032382672232848e-67, 1.193219127557863348e-66, + 1.336816930381306582e-65, 1.442283479679798385e-64, 1.499374555004793991e-63, 1.502797203133501438e-62, + 1.453005969318485303e-61, 1.355980448377862540e-60, 1.222072412212552127e-59, 1.064223180270520159e-58, + 8.959667396075636845e-58, 7.296288808079294105e-57, 5.750255296190181158e-56, 4.388011664829013518e-55, + 3.243852451291832398e-54, 2.324239357665538806e-53, 1.614869776203026446e-52, 1.088524605545274842e-51, + 7.121755574192829045e-51, 4.524647662549067074e-50, 2.792730715818793035e-49, 1.675384879603864227e-48, + 9.773114328777676091e-48, 5.545910766847627082e-47, 3.062809705627873645e-46, 1.646862118038266234e-45, + 8.625108513887155847e-45, 4.401687663868890701e-44, 2.189755778847646746e-43, 1.062345336449265889e-42, + 5.028036663485684049e-42, 2.322524635717249223e-41, 1.047406593898341306e-40, 4.613438388449698168e-40, + 1.985397445118162005e-39, 8.351027367454628343e-39, 3.434440903484543389e-38, 1.381489131877196646e-37, + 5.437051201310225224e-37, 2.094357548080647717e-36, 7.898676618592006902e-36, 2.917536870947471272e-35, + 1.055788886022716597e-34, 3.744333812160330812e-34, 1.301801185251957290e-33, 4.438346216893387768e-33, + 1.484348268951816542e-32, 4.871001129849836971e-32, 1.568903000742513942e-31, 4.961295315917935235e-31, + 1.540773910027990821e-30, 4.700558022172014910e-30, 1.409115230718949596e-29, 4.151913103955692034e-29, + 1.202737613715427748e-28, 3.426327374934496736e-28, 9.601405359397026012e-28, 2.647278642033773301e-27, + 7.183442220565147103e-27, 1.918850545981494042e-26, 5.046974779455992494e-26, 1.307394799925911700e-25, + 3.336342198236957082e-25, 8.389259581136262194e-25, 2.079051813513548608e-24, 5.079178967243765280e-24, + 1.223501794357837278e-23, 2.906654911057549530e-23, 6.811668606095015470e-23, 1.574985938238025303e-22, + 3.593796788969348326e-22, 8.094185411205212564e-22, 1.799796183237481721e-21, 3.951758901641017285e-21, + 8.569580068050865775e-21, 1.835753486517298696e-20, 3.885414339966022317e-20, 8.126613972895021790e-20, + 1.680007182889503141e-19, 3.433369351563962828e-19, 6.937695550399427499e-19, 1.386345631008981755e-18, + 2.740087497759230881e-18, 5.357570288683386626e-18, 1.036464933022803784e-17, 1.984249442010084992e-17, + 3.759788006060003409e-17, 7.052211261821684795e-17, 1.309635641529546221e-16, 2.408275496109180528e-16, + 4.385898809611711552e-16, 7.911758686849121285e-16, 1.413883597877183873e-15, 2.503477536644680210e-15, + 4.392637866550705827e-15, 7.638710306960574612e-15, 1.316703360377476041e-14, 2.250031027275448919e-14, + 3.812239733412214953e-14, 6.405021660191363479e-14, 1.067250538270319484e-13, 1.763897493784721010e-13, + 2.891987565334547756e-13, 4.704242520369958085e-13, 7.592878273512691990e-13, 1.216183338372525172e-12, + 1.933388593436624879e-12, 3.050826852442290751e-12, 4.779080020017636657e-12, 7.432734713385425098e-12, + 1.147833888125873666e-11, 1.760286160372422754e-11, 2.681071101623953168e-11, 4.056023754295965437e-11, + 6.095443492241537222e-11, 9.100550129616064211e-11, 1.349993452136967652e-10, 1.989943912395156051e-10, + 2.914996073619059788e-10, 4.243900781412219621e-10, 6.141353162671391082e-10, 8.834365795894798511e-10, + 1.263395594025933170e-09, 1.796369250051716047e-09, 2.539704143326480862e-09, 3.570592498287890499e-09, + 4.992348403150539107e-09, 6.942471870489931483e-09, 9.602949600164561371e-09, 1.321333712761666777e-08, + 1.808727901635346390e-08, 2.463325364767791516e-08, 3.338047870136870496e-08, 4.501108426108505069e-08, + 6.039985413333259594e-08, 8.066305374526097834e-08, 1.072181059018892614e-07, 1.418561443795353991e-07, + 1.868297699836383305e-07, 2.449586539172972009e-07, 3.197559780442760832e-07, 4.155790690867544334e-07, + 5.378079713325544678e-07, 6.930561064776686194e-07, 8.894175852502122454e-07, 1.136756157868726006e-06, + 1.447041212534730898e-06, 1.834736645332833504e-06, 2.317248822354253644e-06, 2.915440225825303911e-06, + 3.654215709863551870e-06, 4.563188576773760151e-06, 5.677433909482232878e-06, 7.038336747307571784e-06, + 8.694542758083067228e-06, 1.070301902702759858e-05, 1.313023243937403750e-05, 1.605345286789073897e-05, + 1.956218797728780449e-05, 2.375975591555218862e-05, 2.876500146954361208e-05, 3.471416041263076209e-05, + 4.176287576185915239e-05, 5.008836848967403773e-05, 5.989176390181730373e-05, 7.140057340280213227e-05, + 8.487132973049760036e-05, 1.005923719620999934e-04, 1.188867746885496973e-04, 1.401154137398069279e-04, + 1.646801587388731249e-04, 1.930271805904271778e-04, 2.256503597954330556e-04, 2.630947792533707128e-04, + 3.059602829980946180e-04, 3.549050801425155303e-04, 4.106493712131842727e-04, 4.739789720708565436e-04, + 5.457489087697051069e-04, 6.268869550379884668e-04, 7.183970825975973673e-04, 8.213627933082928901e-04, + 9.369503011517966364e-04, 1.066411531385725184e-03, 1.211086903819095417e-03, 1.372407867107646339e-03, + 1.551899151252505624e-03, 1.751180706119547318e-03, 1.971969294784470944e-03, 2.216079711850908971e-03, + 2.485425598581779636e-03, 2.782019828718993257e-03, 3.107974441230220176e-03, 3.465500098895993776e-03, + 3.856905054613959619e-03, 4.284593610523639393e-03, 4.751064058515097225e-03, 5.258906094345618421e-03, + 5.810797701414435799e-03, 6.409501504198915943e-03, 7.057860595396970186e-03, 7.758793844909123446e-03, + 8.515290702888369372e-03, 9.330405513145299523e-03, 1.020725135717912572e-02, 1.114899345297222760e-02, + 1.215884213639836574e-02, 1.324004545661629463e-02, 1.439588142011718850e-02, 1.562964992113485073e-02, + 1.694466439888404584e-02, 1.834424326453982033e-02, 1.983170114298836870e-02, 2.141033997615067889e-02, + 2.308344003609062690e-02, 2.485425089716015368e-02, 2.672598241710042669e-02, 2.870179577730820310e-02, + 3.078479463239356953e-02, 3.297801641870515720e-02, 3.528442387069167064e-02, 3.770689679281728890e-02, + 4.024822413326941635e-02, 4.291109640390936770e-02, 4.569809848884132640e-02, 4.861170288163592155e-02, + 5.165426338866744454e-02, 5.482800933323496446e-02, 5.813504029216542680e-02, 6.157732139347005467e-02, + 6.515667920037330165e-02, 6.887479820368566403e-02, 7.273321794107712090e-02, 7.673333075835566151e-02, + 8.087638022439339824e-02, 8.516346020789830747e-02, 8.959551462082867423e-02, 9.417333782991444898e-02, + 9.889757573450802477e-02, 1.037687275058577967e-01, 1.087871479799008567e-01, 1.139530506928239996e-01, + 1.192665115459606141e-01, 1.247274730840887416e-01, 1.303357493688843496e-01, 1.360910314271734020e-01, + 1.419928932517243620e-01, 1.480407983306351483e-01, 1.542341066798992024e-01, 1.605720823524863565e-01, + 1.670539013962460335e-01, 1.736786602321317742e-01, 1.804453844236544912e-01, 1.873530378080931153e-01, + 1.944005319598201097e-01, 2.015867359561292115e-01, 2.089104864161762672e-01, 2.163705977840528187e-01, + 2.239658728275971045e-01, 2.316951133252986765e-01, 2.395571309145607347e-01, 2.475507580756380088e-01, + 2.556748592267567912e-01, 2.639283419072366399e-01, 2.723101680268593668e-01, 2.808193651612593497e-01, + 2.894550378747292326e-01, 2.982163790535362503e-01, 3.071026812346166036e-01, 3.161133479163487600e-01, + 3.252479048399920142e-01, 3.345060112323053140e-01, 3.438874710018250777e-01, 3.533922438832718793e-01, + 3.630204565265675291e-01, 3.727724135289699431e-01, 3.826486084108677024e-01, 3.926497345378144818e-01, + 4.027766959934214472e-01, 4.130306184097598756e-01, 4.234128597639539906e-01, 4.339250211516634154e-01, + 4.445689575501645526e-01, 4.553467885857401860e-01, 4.662609093220769612e-01, 4.773140010883521767e-01, + 4.885090423676662636e-01, 4.998493197684479070e-01, 5.113384391034281429e-01, 5.229803366027518117e-01, + 5.347792902897740156e-01, 5.467399315500809553e-01, 5.588672569262846167e-01, 5.711666401731758417e-01, + 5.836438446098876156e-01, 5.963050358078278898e-01, 6.091567946552975691e-01, 6.222061308419237716e-01, + 6.354604968083211637e-01, 6.489278022087558681e-01, 6.626164289370386795e-01, 6.765352467684294227e-01, + 6.906936296730053994e-01, 7.051014728587479919e-01, 7.197692106055475377e-01, 7.347078349544334315e-01, + 7.499289153196209421e-01, 7.654446190944464391e-01, 7.812677333259577661e-01, 7.974116875368567865e-01, + 8.138905777776784362e-01, 8.307191919965581771e-01, 8.479130368187123741e-01, 8.654883658328603475e-01, + 8.834622094872810766e-01, 9.018524067040521621e-01, 9.206776383262963142e-01, 9.399574625199963151e-01, + 9.597123522591707284e-01, 9.799637350309700387e-01, 1.000734034905599933e+00, 1.022046717124952010e+00, + 1.043926335373472893e+00, 1.066398581905185161e+00, 1.089490340711946628e+00, 1.113229743930062164e+00, + 1.137646231695313314e+00, 1.162770615670420260e+00, 1.188635146483979071e+00, 1.215273585336112390e+00, + 1.242721280043529050e+00, 1.271015245815510799e+00, 1.300194251072644711e+00, 1.330298908642019971e+00, + 1.361371772686240192e+00, 1.393457441749111730e+00, 1.426602668328411758e+00, 1.460856475415888358e+00, + 1.496270280476785338e+00, 1.532898027375920169e+00, 1.570796326794896619e+00, 1.610024605725646420e+00, + 1.650645266669431435e+00, 1.692723857217988332e+00, 1.736329250744977731e+00, 1.781533838991654903e+00, + 1.828413737391087381e+00, 1.877049004040720448e+00, 1.927523873304087635e+00, 1.979927005099477087e+00, + 2.034351751016940433e+00, 2.090896438495766214e+00, 2.149664674393090421e+00, 2.210765669381402212e+00, + 2.274314584729113927e+00, 2.340432903144970240e+00, 2.409248825504827076e+00, 2.480897695429288043e+00, + 2.555522453844001656e+00, 2.633274125832370887e+00, 2.714312342284411608e+00, 2.798805899057066353e+00, + 2.886933356592141886e+00, 2.978883683190077867e+00, 3.074856945413050211e+00, 3.175065049391765683e+00, + 3.279732537139255280e+00, 3.389097442334834102e+00, 3.503412210435275865e+00, 3.622944688401595705e+00, + 3.747979189802462585e+00, 3.878817641573403805e+00, 4.015780819279312670e+00, 4.159209678351536168e+00, + 4.309466789455788368e+00, 4.466937886899736897e+00, 4.632033539816493591e+00, 4.805190956770360727e+00, + 4.986875935432896972e+00, 5.177584970080537688e+00, 5.377847530880629761e+00, 5.588228530273088035e+00, + 5.809330993233640059e+00, 6.041798949837089488e+00, 6.286320570342285919e+00, 6.543631565013652661e+00, + 6.814518873098582608e+00, 7.099824667819718682e+00, 7.400450706942931008e+00, 7.717363061475788814e+00, + 8.051597258371279584e+00, 8.404263876795383951e+00, 8.776554641607500109e+00, 9.169749062247565207e+00, + 9.585221670276993889e+00, 1.002444991444300704e+01, 1.048902277839603856e+01, 1.098065019316492606e+01, + 1.150117332427169985e+01, 1.205257582204547280e+01, 1.263699613338454324e+01, 1.325674098404332380e+01, + 1.391430015262873368e+01, 1.461236267104086712e+01, 1.535383460126837531e+01, 1.614185855545811846e+01, + 1.697983514525758524e+01, 1.787144656784601339e+01, 1.882068256013178484e+01, 1.983186897964764985e+01, + 2.090969930111845450e+01, 2.205926935196095527e+01, 2.328611564861881683e+01, 2.459625773922860138e+01, + 2.599624500732998276e+01, 2.749320844694889238e+01, 2.909491798228195984e+01, 3.080984597641076715e+01, + 3.264723765414180400e+01, 3.461718925554321861e+01, 3.673073484057443067e+01, 3.899994278315456980e+01, + 4.143802312713618427e+01, 4.405944712930142330e+01, 4.688008048840357439e+01, 4.991733195758662298e+01, + 5.319031926387298369e+01, 5.672005451703465811e+01, 6.052965158594831140e+01, 6.464455825915836491e+01, + 6.909281639443131774e+01, 7.390535370725211687e+01, 7.911631135942343489e+01, 8.476341209659472308e+01, + 9.088837435982152722e+01, 9.753737857533253823e+01, 1.047615927251647361e+02, 1.126177653386554197e+02, + 1.211688952437418817e+02, 1.304849888043593828e+02, 1.406439169773708701e+02, 1.517323863863765989e+02, + 1.638470407739824279e+02, 1.770957117100033620e+02, 1.915988403612775885e+02, 2.074910955409497265e+02, + 2.249232172361061194e+02, 2.440641194630869936e+02, 2.651032917390266964e+02, 2.882535448280364212e+02, + 3.137541538897424513e+02, 3.418744609277612322e+02, 3.729180087461214321e+02, 4.072272907593818790e+02, + 4.451892153103389878e+02, 4.872414000388630927e+02, 5.338794318098249932e+02, 5.856652513400113117e+02, + 6.432368496766822816e+02, 7.073194969336578611e+02, 7.787387632221277236e+02, 8.584356387770406827e+02, + 9.474841163944599543e+02, 1.047111666301969297e+03, 1.158723113719277435e+03, 1.283928525349707755e+03, + 1.424575826189363437e+03, 1.582789006393775706e+03, 1.761012944445459235e+03, 1.962066073573121788e+03, + 2.189202360708354222e+03, 2.446184360349559652e+03, 2.737369460761187093e+03, 3.067811870808767638e+03, + 3.443383419509962754e+03, 3.870916878218207705e+03, 4.358376293464465508e+03, 4.915059769420260559e+03, + 5.551841303216967404e+03, 6.281459704453426129e+03, 7.118864385205665710e+03, 8.081629967627799596e+03, + 9.190454321738597280e+03, 1.046975794051835702e+04, 1.194840663946247320e+04, 1.366058463062104793e+04, + 1.564685131637809273e+04, 1.795542299179967539e+04, 2.064373043744082514e+04, 2.378031563732670807e+04, + 2.744714621995650953e+04, 3.174244552480722739e+04, 3.678416050731336226e+04, 4.271422037773508051e+04, + 4.970377768100323981e+04, 5.795967273138576164e+04, 6.773242484608792593e+04, 7.932613346949942761e+04, + 9.311077397156915450e+04, 1.095375030536372224e+05, 1.291577556735669526e+05, 1.526471301608741586e+05, + 1.808353350969648289e+05, 2.147438294770164181e+05, 2.556332515573999948e+05, 3.050633345562097502e+05, + 3.649687926665853954e+05, 4.377556866857485380e+05, 5.264241222943208736e+05, 6.347248990108319410e+05, + 7.673600526542426466e+05, 9.302403050337502786e+05, 1.130816502666451845e+06, 1.378507531155523742e+06, + 1.685254393964162275e+06, 2.066239770168639390e+06, 2.540825270229354918e+06, 3.133775962036416630e+06, + 3.876865148275802393e+06, 4.810984054018349430e+06, 5.988924089534678664e+06, 7.479057929608060924e+06, + 9.370225698693408867e+06, 1.177824230977510661e+07, 1.485459301432580619e+07, 1.879809270383398104e+07, + 2.387057334436346400e+07, 3.041806552258603202e+07, 3.889950046843262151e+07, 4.992574374586696017e+07, + 6.431287504495613210e+07, 8.315518519925858136e+07, 1.079255664704117961e+08, 1.406141073390035115e+08, + 1.839201785677305607e+08, 2.415197116904975365e+08, 3.184386015381112281e+08, 4.215765018929686736e+08, + 5.604446356915114550e+08, 7.482094398046911572e+08, 1.003175129668246151e+09, 1.350898918997482870e+09, + 1.827222165053491590e+09, 2.482633480831760933e+09, 3.388577637234919719e+09, 4.646620065299105644e+09, + 6.401821801566297122e+09, 8.862352038053251473e+09, 1.232838602859196811e+10, 1.723489297480180023e+10, + 2.421530528469447376e+10, 3.419673813208063025e+10, 4.854312364622606540e+10, 6.927149043760342676e+10, + 9.938049490186203616e+10, 1.433521424759854145e+11, 2.079221734483088227e+11, 3.032695241820108158e+11, + 4.448631503727710431e+11, 6.563458646477901051e+11, 9.740635696398910980e+11, 1.454220520059656158e+12, + 2.184250688898627320e+12, 3.300999104757560757e+12, 5.019970485022749012e+12, 7.682676299017607834e+12, + 1.183376596003983872e+13, 1.834748853557035315e+13, 2.863639312458363586e+13, 4.499803892715039958e+13, + 7.119486876989154498e+13, 1.134307017980122346e+14, 1.820065782363618395e+14, 2.941484500615394037e+14, + 4.788707305890930382e+14, 7.854025036928623551e+14, 1.297894304619860251e+15, 2.161279954782425640e+15, + 3.627102147035003834e+15, 6.135342933440950378e+15, 1.046170006362244506e+16, 1.798477357839665686e+16, + 3.117473412332331475e+16, 5.449445073049184222e+16, 9.607515505017978212e+16, 1.708589224452677852e+17, + 3.065429751110228665e+17, 5.549227437451149511e+17, 1.013730232778046314e+18, 1.869059895876405824e+18, + 3.478549552381578424e+18, 6.535992245975463763e+18, 1.240019272261066308e+19, 2.375828866910936629e+19, + 4.597682433604432625e+19, 8.988106816837128428e+19, 1.775302379393632263e+20, 3.543413304390973486e+20, + 7.148061397675525327e+20, 1.457620510577186305e+21, 3.005137124879829797e+21, 6.265024861633250697e+21, + 1.320979941090283816e+22, 2.817487535902146221e+22, 6.079933041429805231e+22, 1.327658853647212083e+23, + 2.934311759183641318e+23, 6.565087216807130026e+23, 1.487212273437937650e+24, 3.411840196076788128e+24, + 7.928189928797018762e+24, 1.866451877029704857e+25, 4.452521859886739549e+25, 1.076545435174977662e+26, + 2.638685681190697586e+26, 6.557908470244186498e+26, 1.652952243735585721e+27, 4.226383395914916199e+27, + 1.096450394268080148e+28, 2.886822082999286080e+28, 7.715480389344015925e+28, 2.093728789309964846e+29, + 5.770275789447655037e+29, 1.615463845391781140e+30, 4.595470055795608691e+30, 1.328629392686523255e+31, + 3.905079681530784219e+31, 1.167134024271997252e+32, 3.548058538654277403e+32, 1.097378059358046160e+33, + 3.454102978064445595e+33, 1.106745393701652323e+34, 3.610899559139069994e+34, 1.199946999283670567e+35, + 4.062687014190878792e+35, 1.401835223893224514e+36, 4.931085527333162173e+36, 1.768812393284919500e+37, + 6.472148293945199961e+37, 2.416453721739211922e+38, 9.208944720398123862e+38, 3.583297028622126676e+39, + 1.424097482596699440e+40, 5.782627833426411524e+40, 2.399862204084363183e+41, 1.018291572042305460e+42, + 4.419105414822034531e+42, 1.962126117680499311e+43, 8.916742424061253707e+43, 4.148882478294757720e+44, + 1.977256529558276930e+45, 9.655300233875401080e+45, 4.832878898335598922e+46, 2.480575878223098058e+47, + 1.306102809757654706e+48, 7.057565717289569232e+48, 3.915276522229618618e+49, 2.230898980943393318e+50, + 1.306141334496309306e+51, 7.861021286656392627e+51, 4.865583758538451107e+52, 3.098487425915704674e+53, + 2.031037614862563901e+54, 1.370999647608260200e+55, 9.534736274325001528e+55, 6.834959923166415407e+56, + 5.052733546324789020e+57, 3.853810997282159979e+58, 3.034183107853208298e+59, 2.467161926009838899e+60, + 2.072901039813580593e+61, 1.800563980579615383e+62, 1.617764027895344257e+63, 1.504283028250688329e+64, + 1.448393206525427172e+65, 1.444855510980115799e+66, 1.494120428855029243e+67, 1.602566566107015722e+68, + 1.783880504153942988e+69, 2.061999240572760738e+70, 2.476521794698572715e+71, 3.092349914153497358e+72, + 4.016927238305985810e+73, 5.431607545226497387e+74, 7.650086824042822759e+75, 1.123017984114349288e+77, + 1.719382952966052004e+78, 2.747335718690686674e+79, 4.584545010557684123e+80, 7.995082041539250252e+81, + 1.458119909365899044e+83, 2.783001178679600175e+84, 5.562812231966194628e+85, 1.165338768982404578e+87, + 2.560399126432838224e+88, 5.904549641859098192e+89, 1.430278474749838710e+91, 3.642046122956932563e+92, + 9.756698571206402300e+93, 2.751946044275883051e+95, 8.179164793643197279e+96, 2.563704735086825890e+98, + 8.481656496128255880e+99, 2.964260254403981007e+101, 1.095342970031208886e+103, 4.283148547584870628e+104, + 1.773954352944319744e+106, 7.788991081894224760e+107, 3.628931721056821352e+109, 1.795729272516020592e+111, + 9.446685151482835339e+112, 5.288263179614488101e+114, 3.153311236741401362e+116, 2.004807079683827669e+118, + 1.360407192665237716e+120, 9.862825609807810517e+121, 7.647551788591128099e+123, 6.348802224871730088e+125, + 5.649062361980019098e+127, 5.393248003523784781e+129, 5.530897191915703916e+131, 6.099598644640894333e+133, + 7.242098433491964504e+135, 9.268083053637375570e+137, 1.279942702416040582e+140, 1.909796626960621302e+142, + 3.082540300669885040e+144, 5.388809732384179657e+146, 1.021610251056626535e+149, 2.103005440072790650e+151, + 4.706753990348725570e+153, 1.146834128125248991e+156, }; + +__constant__ double* m_weights_double[8] = { + m_weights_double_1, + m_weights_double_2, + m_weights_double_3, + m_weights_double_4, + m_weights_double_5, + m_weights_double_6, + m_weights_double_7, + m_weights_double_8 +}; + +__constant__ boost::math::size_t float_coefficients_size[8] = {9, 8, 16, 33, 66, 132, 263, 527}; + +__constant__ boost::math::size_t double_coefficients_size[8] = {13, 12, 25, 49, 98, 196, 393, 786}; + +template +struct coefficients_selector; + +template<> +struct coefficients_selector +{ + __device__ static const auto abscissas() { return m_abscissas_float; } + __device__ static const auto weights() { return m_weights_float; } + __device__ static const auto size() { return float_coefficients_size; } +}; + +template<> +struct coefficients_selector +{ + __device__ static const auto abscissas() { return m_abscissas_double; } + __device__ static const auto weights() { return m_weights_double; } + __device__ static const auto size() { return double_coefficients_size; } +}; + + +template > +__device__ auto exp_sinh_integrate_impl(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels) +{ + using K = decltype(f(static_cast(0))); + using boost::math::constants::half; + using boost::math::constants::half_pi; + + // This provided a nice error message for real valued integrals, but it's super awkward for complex-valued integrals: + /*K y_max = f(tools::max_value()); + if(abs(y_max) > tools::epsilon() || !(boost::math::isfinite)(y_max)) + { + K val = abs(y_max); + return static_cast(policies::raise_domain_error(function, "The function you are trying to integrate does not go to zero at infinity, and instead evaluates to %1%", val, Policy())); + }*/ + + //std::cout << std::setprecision(5*std::numeric_limits::digits10); + + // Get the party started with two estimates of the integral: + const auto m_abscissas = coefficients_selector::abscissas(); + const auto m_weights = coefficients_selector::weights(); + const auto m_size = coefficients_selector::size(); + + Real min_abscissa{ 0 }, max_abscissa{ boost::math::tools::max_value() }; + K I0 = 0; + Real L1_I0 = 0; + for(boost::math::size_t i = 0; i < m_size[0]; ++i) + { + K y = f(m_abscissas[0][i]); + K I0_last = I0; + I0 += y*m_weights[0][i]; + L1_I0 += abs(y)*m_weights[0][i]; + if ((I0_last == I0) && (abs(I0) != 0)) + { + max_abscissa = m_abscissas[0][i]; + break; + } + } + + //std::cout << "First estimate : " << I0 << std::endl; + K I1 = I0; + Real L1_I1 = L1_I0; + bool have_first_j = false; + boost::math::size_t first_j = 0; + for (boost::math::size_t i = 0; (i < m_size[1]) && (m_abscissas[1][i] < max_abscissa); ++i) + { + K y = f(m_abscissas[1][i]); + K I1_last = I1; + I1 += y*m_weights[1][i]; + L1_I1 += abs(y)*m_weights[1][i]; + if (!have_first_j && (I1_last == I1)) + { + // No change to the sum, disregard these values on the LHS: + if ((i < m_size[1] - 1) && (m_abscissas[1][i + 1] > max_abscissa)) + { + // The summit is so high, that we found nothing in this row which added to the integral!! + have_first_j = true; + } + else + { + min_abscissa = m_abscissas[1][i]; + first_j = i; + } + } + else + { + have_first_j = true; + } + } + + if (I0 == static_cast(0)) + { + // We failed to find anything, is the integral zero, or have we just not found it yet? + // We'll try one more level, if that still finds nothing then it'll terminate. + min_abscissa = 0; + max_abscissa = boost::math::tools::max_value(); + } + + I1 *= half(); + L1_I1 *= half(); + Real err = abs(I0 - I1); + //std::cout << "Second estimate: " << I1 << " Error estimate at level " << 1 << " = " << err << std::endl; + + boost::math::size_t i = 2; + for(; i < 8U; ++i) // Magic number 8 is the number of precomputed levels + { + I0 = I1; + L1_I0 = L1_I1; + + I1 = half()*I0; + L1_I1 = half()*L1_I0; + Real h = static_cast(1)/static_cast(1 << i); + K sum = 0; + Real absum = 0; + + auto& abscissas_row = m_abscissas[i]; + auto& weight_row = m_weights[i]; + + // appoximate location to start looking for lowest meaningful abscissa value + first_j = first_j == 0 ? 0 : 2 * first_j - 1; + + boost::math::size_t j = first_j; + while (abscissas_row[j] < min_abscissa) + { + ++j; + } + + for(; (j < m_size[i]) && (abscissas_row[j] < max_abscissa); ++j) + { + Real x = abscissas_row[j]; + K y = f(x); + sum += y*weight_row[j]; + Real abterm0 = abs(y)*weight_row[j]; + absum += abterm0; + } + + I1 += sum*h; + L1_I1 += absum*h; + err = abs(I0 - I1); + if (!(boost::math::isfinite)(L1_I1)) + { + return static_cast(policies::raise_evaluation_error("exp_sinh_integrate", "The exp_sinh quadrature evaluated your function at a singular point and returned %1%. Please ensure your function evaluates to a finite number over its entire domain.", I1, Policy())); + } + if (err <= tolerance*L1_I1) + { + break; + } + } + + if (error) + { + *error = err; + } + + if(L1) + { + *L1 = L1_I1; + } + + if (levels) + { + *levels = i; + } + + return I1; +} + +} // namespace detail +} // namespace quadrature +} // namespace math +} // namespace boost + +#endif // BOOST_MATH_ENABLE_CUDA + +#endif // BOOST_MATH_QUADRATURE_DETAIL_EXP_SINH_DETAIL_HPP diff --git a/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp b/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp index a9e1ef4931..7f7477a6e6 100644 --- a/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp +++ b/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp @@ -1,4 +1,5 @@ // Copyright Nick Thompson, 2017 +// Copyright Matt Borland, 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt @@ -7,6 +8,10 @@ #ifndef BOOST_MATH_QUADRATURE_DETAIL_SINH_SINH_DETAIL_HPP #define BOOST_MATH_QUADRATURE_DETAIL_SINH_SINH_DETAIL_HPP +#include + +#ifndef BOOST_MATH_HAS_NVRTC + #include #include #include @@ -15,7 +20,6 @@ #include #include #include -#include #ifdef BOOST_MATH_HAS_THREADS #include @@ -485,4 +489,865 @@ void sinh_sinh_detail::init(const std::integral_constant&) #endif }}}} -#endif + +#endif // BOOST_MATH_HAS_NVRTC + +#ifdef BOOST_MATH_ENABLE_CUDA + +#include +#include +#include +#include +#include +#include + +namespace boost { +namespace math { +namespace quadrature { +namespace detail { + +__constant__ float m_abscissas_float_1[4] = + { 3.08828742e+00f, 1.48993185e+02f, 3.41228925e+06f, 2.06932577e+18f, }; + +__constant__ float m_abscissas_float_2[4] = + { 9.13048763e-01f, 1.41578929e+01f, 6.70421552e+03f, 9.64172533e+10f, }; + +__constant__ float m_abscissas_float_3[8] = + { 4.07297690e-01f, 1.68206671e+00f, 6.15089799e+00f, 4.00396235e+01f, 7.92920025e+02f, 1.02984971e+05f, + 3.03862311e+08f, 1.56544547e+14f, }; + +__constant__ float m_abscissas_float_4[16] = + { 1.98135272e-01f, 6.40155674e-01f, 1.24892870e+00f, 2.26608084e+00f, 4.29646270e+00f, 9.13029039e+00f, + 2.31110765e+01f, 7.42770603e+01f, 3.26720921e+02f, 2.15948569e+03f, 2.41501526e+04f, 5.31819400e+05f, + 2.80058686e+07f, 4.52406508e+09f, 3.08561257e+12f, 1.33882673e+16f, }; + +__constant__ float m_abscissas_float_5[32] = + { 9.83967894e-02f, 3.00605618e-01f, 5.19857979e-01f, 7.70362083e-01f, 1.07131137e+00f, 1.45056976e+00f, + 1.95077855e+00f, 2.64003177e+00f, 3.63137237e+00f, 5.11991533e+00f, 7.45666098e+00f, 1.13022613e+01f, + 1.79641069e+01f, 3.01781070e+01f, 5.40387580e+01f, 1.04107731e+02f, 2.18029520e+02f, 5.02155699e+02f, + 1.28862131e+03f, 3.73921687e+03f, 1.24750730e+04f, 4.87639975e+04f, 2.28145658e+05f, 1.30877796e+06f, + 9.46084663e+06f, 8.88883120e+07f, 1.12416883e+09f, 1.99127673e+10f, 5.16743469e+11f, 2.06721881e+13f, + 1.35061503e+15f, 1.53854066e+17f, }; + +__constant__ float m_abscissas_float_6[65] = + { 4.91151004e-02f, 1.48013150e-01f, 2.48938814e-01f, 3.53325424e-01f, 4.62733557e-01f, 5.78912068e-01f, + 7.03870253e-01f, 8.39965859e-01f, 9.90015066e-01f, 1.15743257e+00f, 1.34641276e+00f, 1.56216711e+00f, + 1.81123885e+00f, 2.10192442e+00f, 2.44484389e+00f, 2.85372075e+00f, 3.34645891e+00f, 3.94664582e+00f, + 4.68567310e+00f, 5.60576223e+00f, 6.76433234e+00f, 8.24038318e+00f, 1.01439436e+01f, 1.26302471e+01f, + 1.59213040e+01f, 2.03392186e+01f, 2.63584645e+01f, 3.46892633e+01f, 4.64129147e+01f, 6.32055079e+01f, + 8.77149726e+01f, 1.24209693e+02f, 1.79718635e+02f, 2.66081728e+02f, 4.03727303e+02f, 6.28811307e+02f, + 1.00707984e+03f, 1.66156823e+03f, 2.82965144e+03f, 4.98438627e+03f, 9.10154693e+03f, 1.72689266e+04f, + 3.41309958e+04f, 7.04566898e+04f, 1.52340422e+05f, 3.46047978e+05f, 8.28472421e+05f, 2.09759615e+06f, + 5.63695080e+06f, 1.61407141e+07f, 4.94473068e+07f, 1.62781052e+08f, 5.78533297e+08f, 2.23083854e+09f, + 9.38239131e+09f, 4.32814954e+10f, 2.20307274e+11f, 1.24524507e+12f, 7.86900053e+12f, 5.59953143e+13f, + 4.52148695e+14f, 4.17688952e+15f, 4.45286776e+16f, 5.52914285e+17f, 8.07573252e+18f, }; + +__constant__ float m_abscissas_float_7[129] = + { 2.45471558e-02f, 7.37246687e-02f, 1.23152531e-01f, 1.73000138e-01f, 2.23440665e-01f, 2.74652655e-01f, + 3.26821679e-01f, 3.80142101e-01f, 4.34818964e-01f, 4.91070037e-01f, 5.49128046e-01f, 6.09243132e-01f, + 6.71685571e-01f, 7.36748805e-01f, 8.04752842e-01f, 8.76048080e-01f, 9.51019635e-01f, 1.03009224e+00f, + 1.11373586e+00f, 1.20247203e+00f, 1.29688123e+00f, 1.39761124e+00f, 1.50538689e+00f, 1.62102121e+00f, + 1.74542840e+00f, 1.87963895e+00f, 2.02481711e+00f, 2.18228138e+00f, 2.35352849e+00f, 2.54026147e+00f, + 2.74442267e+00f, 2.96823279e+00f, 3.21423687e+00f, 3.48535896e+00f, 3.78496698e+00f, 4.11695014e+00f, + 4.48581137e+00f, 4.89677825e+00f, 5.35593629e+00f, 5.87038976e+00f, 6.44845619e+00f, 7.09990245e+00f, + 7.83623225e+00f, 8.67103729e+00f, 9.62042778e+00f, 1.07035620e+01f, 1.19433001e+01f, 1.33670142e+01f, + 1.50075962e+01f, 1.69047155e+01f, 1.91063967e+01f, 2.16710044e+01f, 2.46697527e+01f, 2.81898903e+01f, + 3.23387613e+01f, 3.72490076e+01f, 4.30852608e+01f, 5.00527965e+01f, 5.84087761e+01f, 6.84769282e+01f, + 8.06668178e+01f, 9.54992727e+01f, 1.13640120e+02f, 1.35945194e+02f, 1.63520745e+02f, 1.97804969e+02f, + 2.40678754e+02f, 2.94617029e+02f, 3.62896953e+02f, 4.49886178e+02f, 5.61444735e+02f, 7.05489247e+02f, + 8.92790773e+02f, 1.13811142e+03f, 1.46183599e+03f, 1.89233262e+03f, 2.46939604e+03f, 3.24931157e+03f, + 4.31236711e+03f, 5.77409475e+03f, 7.80224724e+03f, 1.06426753e+04f, 1.46591538e+04f, 2.03952854e+04f, + 2.86717062e+04f, 4.07403376e+04f, 5.85318231e+04f, 8.50568927e+04f, 1.25064927e+05f, 1.86137394e+05f, + 2.80525578e+05f, 4.28278249e+05f, 6.62634051e+05f, 1.03944324e+06f, 1.65385743e+06f, 2.67031565e+06f, + 4.37721203e+06f, 7.28807171e+06f, 1.23317299e+07f, 2.12155729e+07f, 3.71308625e+07f, 6.61457938e+07f, + 1.20005529e+08f, 2.21862941e+08f, 4.18228294e+08f, 8.04370413e+08f, 1.57939299e+09f, 3.16812242e+09f, + 6.49660681e+09f, 1.36285199e+10f, 2.92686390e+10f, 6.43979867e+10f, 1.45275523e+11f, 3.36285446e+11f, + 7.99420279e+11f, 1.95326423e+12f, 4.90958187e+12f, 1.27062273e+13f, 3.38907099e+13f, 9.32508403e+13f, + 2.64948942e+14f, 7.78129518e+14f, 2.36471505e+15f, 7.44413803e+15f, 2.43021724e+16f, 8.23706864e+16f, + 2.90211705e+17f, 1.06415768e+18f, 4.06627711e+18f, }; + +__constant__ float m_abscissas_float_8[259] = + { 1.22722792e-02f, 3.68272289e-02f, 6.14133763e-02f, 8.60515971e-02f, 1.10762884e-01f, 1.35568393e-01f, + 1.60489494e-01f, 1.85547813e-01f, 2.10765290e-01f, 2.36164222e-01f, 2.61767321e-01f, 2.87597761e-01f, + 3.13679240e-01f, 3.40036029e-01f, 3.66693040e-01f, 3.93675878e-01f, 4.21010910e-01f, 4.48725333e-01f, + 4.76847237e-01f, 5.05405685e-01f, 5.34430786e-01f, 5.63953775e-01f, 5.94007101e-01f, 6.24624511e-01f, + 6.55841151e-01f, 6.87693662e-01f, 7.20220285e-01f, 7.53460977e-01f, 7.87457528e-01f, 8.22253686e-01f, + 8.57895297e-01f, 8.94430441e-01f, 9.31909591e-01f, 9.70385775e-01f, 1.00991475e+00f, 1.05055518e+00f, + 1.09236885e+00f, 1.13542087e+00f, 1.17977990e+00f, 1.22551840e+00f, 1.27271289e+00f, 1.32144424e+00f, + 1.37179794e+00f, 1.42386447e+00f, 1.47773961e+00f, 1.53352485e+00f, 1.59132774e+00f, 1.65126241e+00f, + 1.71344993e+00f, 1.77801893e+00f, 1.84510605e+00f, 1.91485658e+00f, 1.98742510e+00f, 2.06297613e+00f, + 2.14168493e+00f, 2.22373826e+00f, 2.30933526e+00f, 2.39868843e+00f, 2.49202464e+00f, 2.58958621e+00f, + 2.69163219e+00f, 2.79843963e+00f, 2.91030501e+00f, 3.02754584e+00f, 3.15050230e+00f, 3.27953915e+00f, + 3.41504770e+00f, 3.55744805e+00f, 3.70719145e+00f, 3.86476298e+00f, 4.03068439e+00f, 4.20551725e+00f, + 4.38986641e+00f, 4.58438376e+00f, 4.78977239e+00f, 5.00679110e+00f, 5.23625945e+00f, 5.47906320e+00f, + 5.73616037e+00f, 6.00858792e+00f, 6.29746901e+00f, 6.60402117e+00f, 6.92956515e+00f, 7.27553483e+00f, + 7.64348809e+00f, 8.03511888e+00f, 8.45227058e+00f, 8.89695079e+00f, 9.37134780e+00f, 9.87784877e+00f, + 1.04190601e+01f, 1.09978298e+01f, 1.16172728e+01f, 1.22807990e+01f, 1.29921443e+01f, 1.37554055e+01f, + 1.45750793e+01f, 1.54561061e+01f, 1.64039187e+01f, 1.74244972e+01f, 1.85244301e+01f, 1.97109839e+01f, + 2.09921804e+01f, 2.23768845e+01f, 2.38749023e+01f, 2.54970927e+01f, 2.72554930e+01f, 2.91634608e+01f, + 3.12358351e+01f, 3.34891185e+01f, 3.59416839e+01f, 3.86140099e+01f, 4.15289481e+01f, 4.47120276e+01f, + 4.81918020e+01f, 5.20002465e+01f, 5.61732106e+01f, 6.07509371e+01f, 6.57786566e+01f, 7.13072704e+01f, + 7.73941341e+01f, 8.41039609e+01f, 9.15098607e+01f, 9.96945411e+01f, 1.08751694e+02f, 1.18787600e+02f, + 1.29922990e+02f, 1.42295202e+02f, 1.56060691e+02f, 1.71397955e+02f, 1.88510933e+02f, 2.07632988e+02f, + 2.29031559e+02f, 2.53013612e+02f, 2.79932028e+02f, 3.10193130e+02f, 3.44265522e+02f, 3.82690530e+02f, + 4.26094527e+02f, 4.75203518e+02f, 5.30860437e+02f, 5.94045681e+02f, 6.65901543e+02f, 7.47761337e+02f, + 8.41184173e+02f, 9.47996570e+02f, 1.07034233e+03f, 1.21074246e+03f, 1.37216724e+03f, 1.55812321e+03f, + 1.77275819e+03f, 2.02098849e+03f, 2.30865326e+03f, 2.64270219e+03f, 3.03142418e+03f, 3.48472668e+03f, + 4.01447750e+03f, 4.63492426e+03f, 5.36320995e+03f, 6.22000841e+03f, 7.23030933e+03f, 8.42439022e+03f, + 9.83902287e+03f, 1.15189746e+04f, 1.35188810e+04f, 1.59055875e+04f, 1.87610857e+04f, 2.21862046e+04f, + 2.63052621e+04f, 3.12719440e+04f, 3.72767546e+04f, 4.45564828e+04f, 5.34062659e+04f, 6.41950058e+04f, + 7.73851264e+04f, 9.35579699e+04f, 1.13446538e+05f, 1.37977827e+05f, 1.68327749e+05f, 2.05992575e+05f, + 2.52882202e+05f, 3.11442272e+05f, 3.84814591e+05f, 4.77048586e+05f, 5.93380932e+05f, 7.40606619e+05f, + 9.27573047e+05f, 1.16584026e+06f, 1.47056632e+06f, 1.86169890e+06f, 2.36558487e+06f, 3.01715270e+06f, + 3.86288257e+06f, 4.96486431e+06f, 6.40636283e+06f, 8.29948185e+06f, 1.07957589e+07f, 1.41008733e+07f, + 1.84951472e+07f, 2.43622442e+07f, 3.22295113e+07f, 4.28249388e+07f, 5.71579339e+07f, 7.66343793e+07f, + 1.03221273e+08f, 1.39683399e+08f, 1.89925150e+08f, 2.59486540e+08f, 3.56266474e+08f, 4.91582541e+08f, + 6.81731647e+08f, 9.50299811e+08f, 1.33159830e+09f, 1.87580198e+09f, 2.65667391e+09f, 3.78324022e+09f, + 5.41753185e+09f, 7.80169537e+09f, 1.12996537e+10f, 1.64614916e+10f, 2.41235400e+10f, 3.55648690e+10f, + 5.27534501e+10f, 7.87357211e+10f, 1.18256902e+11f, 1.78754944e+11f, 2.71963306e+11f, 4.16512215e+11f, + 6.42178186e+11f, 9.96872550e+11f, 1.55821233e+12f, 2.45280998e+12f, 3.88865623e+12f, 6.20986899e+12f, + 9.98992422e+12f, 1.61915800e+13f, 2.64432452e+13f, 4.35201885e+13f, 7.21888469e+13f, 1.20699764e+14f, + 2.03448372e+14f, 3.45755310e+14f, 5.92524851e+14f, 1.02405779e+15f, 1.78517405e+15f, 3.13930699e+15f, + 5.56985627e+15f, 9.97176335e+15f, 1.80168749e+16f, 3.28570986e+16f, 6.04901854e+16f, 1.12437528e+17f, + 2.11044513e+17f, 4.00073701e+17f, 7.66084936e+17f, 1.48201877e+18f, 2.89694543e+18f, 5.72279017e+18f, + 1.14268996e+19f, }; + +__constant__ float* m_abscissas_float[8] = { + m_abscissas_float_1, + m_abscissas_float_2, + m_abscissas_float_3, + m_abscissas_float_4, + m_abscissas_float_5, + m_abscissas_float_6, + m_abscissas_float_7, + m_abscissas_float_8, +}; + +__constant__ float m_weights_float_1[4] = + { 7.86824160e+00f, 8.80516388e+02f, 5.39627832e+07f, 8.87651190e+19f, }; + +__constant__ float m_weights_float_2[4] = + { 2.39852428e+00f, 5.24459642e+01f, 6.45788782e+04f, 2.50998524e+12f, }; + +__constant__ float m_weights_float_3[8] = + { 1.74936958e+00f, 3.97965898e+00f, 1.84851460e+01f, 1.86488072e+02f, 5.97420570e+03f, 1.27041264e+06f, + 6.16419301e+09f, 5.23085003e+15f, }; + +__constant__ float m_weights_float_4[16] = + { 1.61385906e+00f, 1.99776729e+00f, 3.02023198e+00f, 5.47764184e+00f, 1.17966092e+01f, 3.03550485e+01f, + 9.58442179e+01f, 3.89387024e+02f, 2.17919325e+03f, 1.83920812e+04f, 2.63212061e+05f, 7.42729651e+06f, + 5.01587565e+08f, 1.03961087e+11f, 9.10032891e+13f, 5.06865116e+17f, }; + +__constant__ float m_weights_float_5[32] = + { 1.58146596e+00f, 1.66914991e+00f, 1.85752319e+00f, 2.17566262e+00f, 2.67590138e+00f, 3.44773868e+00f, + 4.64394654e+00f, 6.53020450e+00f, 9.58228502e+00f, 1.46836141e+01f, 2.35444955e+01f, 3.96352727e+01f, + 7.03763521e+01f, 1.32588012e+02f, 2.66962565e+02f, 5.79374920e+02f, 1.36869193e+03f, 3.55943572e+03f, + 1.03218668e+04f, 3.38662130e+04f, 1.27816626e+05f, 5.65408251e+05f, 2.99446204e+06f, 1.94497502e+07f, + 1.59219301e+08f, 1.69428882e+09f, 2.42715618e+10f, 4.87031785e+11f, 1.43181966e+13f, 6.48947152e+14f, + 4.80375775e+16f, 6.20009636e+18f, }; + +__constant__ float m_weights_float_6[65] = + { 1.57345777e+00f, 1.59489276e+00f, 1.63853652e+00f, 1.70598041e+00f, 1.79972439e+00f, 1.92332285e+00f, + 2.08159737e+00f, 2.28093488e+00f, 2.52969785e+00f, 2.83878478e+00f, 3.22239575e+00f, 3.69908136e+00f, + 4.29318827e+00f, 5.03686536e+00f, 5.97287114e+00f, 7.15853842e+00f, 8.67142780e+00f, 1.06174736e+01f, + 1.31428500e+01f, 1.64514563e+01f, 2.08309945e+01f, 2.66923599e+01f, 3.46299351e+01f, 4.55151836e+01f, + 6.06440809e+01f, 8.19729692e+01f, 1.12502047e+02f, 1.56909655e+02f, 2.22620435e+02f, 3.21638549e+02f, + 4.73757451e+02f, 7.12299455e+02f, 1.09460965e+03f, 1.72169779e+03f, 2.77592491e+03f, 4.59523007e+03f, + 7.82342759e+03f, 1.37235744e+04f, 2.48518896e+04f, 4.65553875e+04f, 9.04176678e+04f, 1.82484396e+05f, + 3.83680026e+05f, 8.42627197e+05f, 1.93843257e+06f, 4.68511285e+06f, 1.19352867e+07f, 3.21564375e+07f, + 9.19600893e+07f, 2.80222318e+08f, 9.13611083e+08f, 3.20091090e+09f, 1.21076526e+10f, 4.96902475e+10f, + 2.22431575e+11f, 1.09212534e+12f, 5.91688298e+12f, 3.55974344e+13f, 2.39435365e+14f, 1.81355107e+15f, + 1.55873671e+16f, 1.53271488e+17f, 1.73927478e+18f, 2.29884122e+19f, 3.57403070e+20f, }; + +__constant__ float m_weights_float_7[129] = + { 1.57146132e+00f, 1.57679017e+00f, 1.58749564e+00f, 1.60367396e+00f, 1.62547113e+00f, 1.65308501e+00f, + 1.68676814e+00f, 1.72683132e+00f, 1.77364814e+00f, 1.82766042e+00f, 1.88938482e+00f, 1.95942057e+00f, + 2.03845873e+00f, 2.12729290e+00f, 2.22683194e+00f, 2.33811466e+00f, 2.46232715e+00f, 2.60082286e+00f, + 2.75514621e+00f, 2.92706011e+00f, 3.11857817e+00f, 3.33200254e+00f, 3.56996830e+00f, 3.83549565e+00f, + 4.13205150e+00f, 4.46362211e+00f, 4.83479919e+00f, 5.25088196e+00f, 5.71799849e+00f, 6.24325042e+00f, + 6.83488580e+00f, 7.50250620e+00f, 8.25731548e+00f, 9.11241941e+00f, 1.00831875e+01f, 1.11876913e+01f, + 1.24472371e+01f, 1.38870139e+01f, 1.55368872e+01f, 1.74323700e+01f, 1.96158189e+01f, 2.21379089e+01f, + 2.50594593e+01f, 2.84537038e+01f, 3.24091185e+01f, 3.70329629e+01f, 4.24557264e+01f, 4.88367348e+01f, + 5.63712464e+01f, 6.52994709e+01f, 7.59180776e+01f, 8.85949425e+01f, 1.03788130e+02f, 1.22070426e+02f, + 1.44161210e+02f, 1.70968019e+02f, 2.03641059e+02f, 2.43645006e+02f, 2.92854081e+02f, 3.53678602e+02f, + 4.29234308e+02f, 5.23570184e+02f, 6.41976690e+02f, 7.91405208e+02f, 9.81042209e+02f, 1.22309999e+03f, + 1.53391256e+03f, 1.93546401e+03f, 2.45753455e+03f, 3.14073373e+03f, 4.04081819e+03f, 5.23488160e+03f, + 6.83029446e+03f, 8.97771323e+03f, 1.18901592e+04f, 1.58712239e+04f, 2.13571111e+04f, 2.89798371e+04f, + 3.96630673e+04f, 5.47687519e+04f, 7.63235654e+04f, 1.07371915e+05f, 1.52531667e+05f, 2.18877843e+05f, + 3.17362450e+05f, 4.65120153e+05f, 6.89253766e+05f, 1.03311989e+06f, 1.56688798e+06f, 2.40549203e+06f, + 3.73952896e+06f, 5.88912115e+06f, 9.39904635e+06f, 1.52090328e+07f, 2.49628719e+07f, 4.15775926e+07f, + 7.03070537e+07f, 1.20759856e+08f, 2.10788251e+08f, 3.74104720e+08f, 6.75449459e+08f, 1.24131674e+09f, + 2.32331003e+09f, 4.43117602e+09f, 8.61744649e+09f, 1.70983691e+10f, 3.46357452e+10f, 7.16760712e+10f, + 1.51634762e+11f, 3.28172932e+11f, 7.27110260e+11f, 1.65049955e+12f, 3.84133815e+12f, 9.17374427e+12f, + 2.24990195e+13f, 5.67153509e+13f, 1.47074225e+14f, 3.92701252e+14f, 1.08063998e+15f, 3.06767147e+15f, + 8.99238679e+15f, 2.72472254e+16f, 8.54294612e+16f, 2.77461372e+17f, 9.34529948e+17f, 3.26799612e+18f, + 1.18791443e+19f, 4.49405341e+19f, 1.77170665e+20f, }; + +__constant__ float m_weights_float_8[259] = + { 1.57096255e+00f, 1.57229290e+00f, 1.57495658e+00f, 1.57895955e+00f, 1.58431079e+00f, 1.59102230e+00f, + 1.59910918e+00f, 1.60858966e+00f, 1.61948515e+00f, 1.63182037e+00f, 1.64562338e+00f, 1.66092569e+00f, + 1.67776241e+00f, 1.69617233e+00f, 1.71619809e+00f, 1.73788633e+00f, 1.76128784e+00f, 1.78645779e+00f, + 1.81345587e+00f, 1.84234658e+00f, 1.87319943e+00f, 1.90608922e+00f, 1.94109632e+00f, 1.97830698e+00f, + 2.01781368e+00f, 2.05971547e+00f, 2.10411838e+00f, 2.15113585e+00f, 2.20088916e+00f, 2.25350798e+00f, + 2.30913084e+00f, 2.36790578e+00f, 2.42999091e+00f, 2.49555516e+00f, 2.56477893e+00f, 2.63785496e+00f, + 2.71498915e+00f, 2.79640147e+00f, 2.88232702e+00f, 2.97301705e+00f, 3.06874019e+00f, 3.16978367e+00f, + 3.27645477e+00f, 3.38908227e+00f, 3.50801806e+00f, 3.63363896e+00f, 3.76634859e+00f, 3.90657947e+00f, + 4.05479525e+00f, 4.21149322e+00f, 4.37720695e+00f, 4.55250922e+00f, 4.73801517e+00f, 4.93438579e+00f, + 5.14233166e+00f, 5.36261713e+00f, 5.59606472e+00f, 5.84356014e+00f, 6.10605759e+00f, 6.38458564e+00f, + 6.68025373e+00f, 6.99425915e+00f, 7.32789480e+00f, 7.68255767e+00f, 8.05975815e+00f, 8.46113023e+00f, + 8.88844279e+00f, 9.34361190e+00f, 9.82871448e+00f, 1.03460033e+01f, 1.08979234e+01f, 1.14871305e+01f, + 1.21165112e+01f, 1.27892047e+01f, 1.35086281e+01f, 1.42785033e+01f, 1.51028871e+01f, 1.59862046e+01f, + 1.69332867e+01f, 1.79494108e+01f, 1.90403465e+01f, 2.02124072e+01f, 2.14725057e+01f, 2.28282181e+01f, + 2.42878539e+01f, 2.58605342e+01f, 2.75562800e+01f, 2.93861096e+01f, 3.13621485e+01f, 3.34977526e+01f, + 3.58076454e+01f, 3.83080730e+01f, 4.10169773e+01f, 4.39541917e+01f, 4.71416602e+01f, 5.06036855e+01f, + 5.43672075e+01f, 5.84621188e+01f, 6.29216205e+01f, 6.77826252e+01f, 7.30862125e+01f, 7.88781469e+01f, + 8.52094636e+01f, 9.21371360e+01f, 9.97248336e+01f, 1.08043785e+02f, 1.17173764e+02f, 1.27204209e+02f, + 1.38235512e+02f, 1.50380485e+02f, 1.63766039e+02f, 1.78535118e+02f, 1.94848913e+02f, 2.12889407e+02f, + 2.32862309e+02f, 2.55000432e+02f, 2.79567594e+02f, 3.06863126e+02f, 3.37227087e+02f, 3.71046310e+02f, + 4.08761417e+02f, 4.50874968e+02f, 4.97960949e+02f, 5.50675821e+02f, 6.09771424e+02f, 6.76110054e+02f, + 7.50682104e+02f, 8.34626760e+02f, 9.29256285e+02f, 1.03608458e+03f, 1.15686082e+03f, 1.29360914e+03f, + 1.44867552e+03f, 1.62478326e+03f, 1.82509876e+03f, 2.05330964e+03f, 2.31371761e+03f, 2.61134924e+03f, + 2.95208799e+03f, 3.34283233e+03f, 3.79168493e+03f, 4.30817984e+03f, 4.90355562e+03f, 5.59108434e+03f, + 6.38646863e+03f, 7.30832183e+03f, 8.37874981e+03f, 9.62405722e+03f, 1.10756067e+04f, 1.27708661e+04f, + 1.47546879e+04f, 1.70808754e+04f, 1.98141031e+04f, 2.30322789e+04f, 2.68294532e+04f, 3.13194118e+04f, + 3.66401221e+04f, 4.29592484e+04f, 5.04810088e+04f, 5.94547213e+04f, 7.01854788e+04f, 8.30475173e+04f, + 9.85009981e+04f, 1.17113127e+05f, 1.39584798e+05f, 1.66784302e+05f, 1.99790063e+05f, 2.39944995e+05f, + 2.88925794e+05f, 3.48831531e+05f, 4.22297220e+05f, 5.12639825e+05f, 6.24046488e+05f, 7.61817907e+05f, + 9.32683930e+05f, 1.14521401e+06f, 1.41035265e+06f, 1.74212004e+06f, 2.15853172e+06f, 2.68280941e+06f, + 3.34498056e+06f, 4.18399797e+06f, 5.25055801e+06f, 6.61086017e+06f, 8.35163942e+06f, 1.05869253e+07f, + 1.34671524e+07f, 1.71914827e+07f, 2.20245345e+07f, 2.83191730e+07f, 3.65476782e+07f, 4.73445266e+07f, + 6.15653406e+07f, 8.03684303e+07f, 1.05328028e+08f, 1.38592169e+08f, 1.83103699e+08f, 2.42910946e+08f, + 3.23606239e+08f, 4.32947522e+08f, 5.81743297e+08f, 7.85117979e+08f, 1.06432920e+09f, 1.44938958e+09f, + 1.98286647e+09f, 2.72541431e+09f, 3.76386796e+09f, 5.22313881e+09f, 7.28378581e+09f, 1.02080964e+10f, + 1.43789932e+10f, 2.03583681e+10f, 2.89749983e+10f, 4.14577375e+10f, 5.96383768e+10f, 8.62622848e+10f, + 1.25466705e+11f, 1.83521298e+11f, 2.69981221e+11f, 3.99492845e+11f, 5.94638056e+11f, 8.90440997e+11f, + 1.34155194e+12f, 2.03376855e+12f, 3.10262796e+12f, 4.76359832e+12f, 7.36142036e+12f, 1.14512696e+13f, + 1.79331419e+13f, 2.82758550e+13f, 4.48929705e+13f, 7.17780287e+13f, 1.15585510e+14f, 1.87483389e+14f, + 3.06351036e+14f, 5.04340065e+14f, 8.36616340e+14f, 1.39855635e+15f, 2.35633575e+15f, 4.00176517e+15f, + 6.85137513e+15f, 1.18269011e+16f, 2.05867353e+16f, 3.61396878e+16f, 6.39911218e+16f, 1.14301619e+17f, + 2.05988138e+17f, 3.74584679e+17f, 6.87444303e+17f, 1.27340764e+18f, 2.38124192e+18f, 4.49583562e+18f, + 8.57144202e+18f, 1.65044358e+19f, 3.21010035e+19f, 6.30778012e+19f, 1.25240403e+20f, 2.51300530e+20f, + 5.09677626e+20f, }; + +__constant__ float* m_weights_float[8] = { + m_weights_float_1, + m_weights_float_2, + m_weights_float_3, + m_weights_float_4, + m_weights_float_5, + m_weights_float_6, + m_weights_float_7, + m_weights_float_8 +}; + +__constant__ double m_abscissas_double_1[6] = + { 3.088287417976322866e+00, 1.489931846492091580e+02, 3.412289247883437102e+06, 2.069325766042617791e+18, + 2.087002407609475560e+50, 2.019766160717908151e+137, }; + +__constant__ double m_abscissas_double_2[6] = + { 9.130487626376696748e-01, 1.415789294662811592e+01, 6.704215516223276482e+03, 9.641725327150499415e+10, + 2.508950760085778485e+30, 1.447263535710337145e+83, }; + +__constant__ double m_abscissas_double_3[12] = + { 4.072976900657586902e-01, 1.682066707021148743e+00, 6.150897986386729515e+00, 4.003962351929400222e+01, + 7.929200247931026321e+02, 1.029849713330979583e+05, 3.038623109252438574e+08, 1.565445474362494869e+14, + 4.042465098430219104e+23, 1.321706827429658179e+39, 4.991231782099557998e+64, 7.352943850359875966e+106, }; + +__constant__ double m_abscissas_double_4[24] = + { 1.981352722514781726e-01, 6.401556735005260177e-01, 1.248928698253977663e+00, 2.266080840944321232e+00, + 4.296462696702327381e+00, 9.130290387099955696e+00, 2.311107653864279933e+01, 7.427706034324012430e+01, + 3.267209207115258917e+02, 2.159485694311818716e+03, 2.415015262896413060e+04, 5.318194002756929158e+05, + 2.800586857217043323e+07, 4.524065079794338780e+09, 3.085612573980677122e+12, 1.338826733015807478e+16, + 6.254617176562341381e+20, 6.182098535814164754e+26, 3.077293649788458067e+34, 2.348957289370104303e+44, + 1.148543197899469758e+57, 2.255300070010069868e+73, 1.877919500569195394e+94, 1.367473887938624280e+121, }; + +__constant__ double m_abscissas_double_5[49] = + { 9.839678940067320339e-02, 3.006056176599550351e-01, 5.198579789949384900e-01, 7.703620832988877009e-01, + 1.071311369641311830e+00, 1.450569758088998445e+00, 1.950778549520360334e+00, 2.640031773695551468e+00, + 3.631372373667412273e+00, 5.119915330903350570e+00, 7.456660981404883289e+00, 1.130226126889972624e+01, + 1.796410692472772550e+01, 3.017810704601898222e+01, 5.403875800312370567e+01, 1.041077314477469548e+02, + 2.180295201202628077e+02, 5.021556986259101646e+02, 1.288621310998222420e+03, 3.739216870800548324e+03, + 1.247507297020191232e+04, 4.876399753226692124e+04, 2.281456582219130122e+05, 1.308777960064843017e+06, + 9.460846634209664077e+06, 8.888831203637279622e+07, 1.124168828974344134e+09, 1.991276729532144470e+10, + 5.167434691060984650e+11, 2.067218814203990888e+13, 1.350615033184100406e+15, 1.538540662836508188e+17, + 3.290747290540350661e+19, 1.437291381884498816e+22, 1.409832445530347286e+25, 3.459135480277971441e+28, + 2.398720582340954092e+32, 5.398806604617292960e+36, 4.613340002580628610e+41, 1.787685909667902457e+47, + 3.841984370124338536e+53, 5.752797955708583700e+60, 7.771812038427286551e+68, 1.269673044204081626e+78, + 3.495676773765731568e+88, 2.362519474971692445e+100, 6.002143893273651123e+113, 9.290716303464155539e+128, + 1.514442238033847090e+146, }; + +__constant__ double m_abscissas_double_6[98] = + { 4.911510035029024930e-02, 1.480131496743607333e-01, 2.489388137406836857e-01, 3.533254236926684378e-01, + 4.627335566122353259e-01, 5.789120681640963067e-01, 7.038702533860627799e-01, 8.399658591446505688e-01, + 9.900150664244376147e-01, 1.157432570143699131e+00, 1.346412759185361763e+00, 1.562167113901335551e+00, + 1.811238852782323380e+00, 2.101924419006550301e+00, 2.444843885584197934e+00, 2.853720746632915024e+00, + 3.346458910955350787e+00, 3.946645821057838387e+00, 4.685673101596678529e+00, 5.605762230908151175e+00, + 6.764332336830574204e+00, 8.240383175379985221e+00, 1.014394356129857730e+01, 1.263024714338892472e+01, + 1.592130395780345258e+01, 2.033921861921857185e+01, 2.635846445760633752e+01, 3.468926333224152409e+01, + 4.641291467019728963e+01, 6.320550793890424203e+01, 8.771497261808906374e+01, 1.242096926240411498e+02, + 1.797186347845127557e+02, 2.660817283327900190e+02, 4.037273029575712841e+02, 6.288113066545908703e+02, + 1.007079837507490594e+03, 1.661568229185114288e+03, 2.829651440786582598e+03, 4.984386266585669139e+03, + 9.101546927647810893e+03, 1.726892655475049727e+04, 3.413099578778601190e+04, 7.045668977053092802e+04, + 1.523404217761279128e+05, 3.460479782897947414e+05, 8.284724209233183002e+05, 2.097596146601193946e+06, + 5.636950798861273236e+06, 1.614071410855607245e+07, 4.944730678915060360e+07, 1.627810516820991356e+08, + 5.785332971632280838e+08, 2.230838540681955690e+09, 9.382391306064739643e+09, 4.328149544776551692e+10, + 2.203072744049242904e+11, 1.245245067109136413e+12, 7.869000534957822375e+12, 5.599531432979422461e+13, + 4.521486949902090877e+14, 4.176889516548293265e+15, 4.452867759650496656e+16, 5.529142853140498068e+17, + 8.075732516562854275e+18, 1.402046916260468698e+20, 2.925791412832239850e+21, 7.426433029335410886e+22, + 2.321996331245735364e+24, 9.064194250638442432e+25, 4.481279048819445609e+27, 2.849046304726990645e+29, + 2.367381159183355975e+31, 2.615825578455121227e+33, 3.914764948263290808e+35, 8.092042448555929219e+37, + 2.358921320940630332e+40, 9.915218648535332591e+42, 6.152851059342658764e+45, 5.780276340144515388e+48, + 8.443751734186488626e+51, 1.973343350899766708e+55, 7.605247378556219980e+58, 4.992057104939510418e+62, + 5.775863423903912316e+66, 1.221808201945355603e+71, 4.912917230387133816e+75, 3.913971813732202372e+80, + 6.456388069905286787e+85, 2.311225068528010358e+91, 1.887458157719431339e+97, 3.708483165438453094e+103, + 1.855198812283538635e+110, 2.509787873171705318e+117, 9.790423755591216617e+124, 1.179088807944050747e+133, + 4.714631846722476620e+141, 6.762657785959713240e+150, }; + +__constant__ double m_abscissas_double_7[196] = + { 2.454715583629863651e-02, 7.372466873903346224e-02, 1.231525309416766543e-01, 1.730001377719248556e-01, + 2.234406649596860001e-01, 2.746526549718518258e-01, 3.268216792980646669e-01, 3.801421009804789245e-01, + 4.348189637215614948e-01, 4.910700365099428407e-01, 5.491280459480215441e-01, 6.092431324382654397e-01, + 6.716855712021148069e-01, 7.367488049067938643e-01, 8.047528416336950644e-01, 8.760480802482050705e-01, + 9.510196351823332253e-01, 1.030092244532470067e+00, 1.113735859588680765e+00, 1.202472030918058876e+00, + 1.296881226496863751e+00, 1.397611241828373026e+00, 1.505386891360545205e+00, 1.621021205894798030e+00, + 1.745428403369044572e+00, 1.879638952031029331e+00, 2.024817107609328524e+00, 2.182281382147884181e+00, + 2.353528494823881355e+00, 2.540261468229626457e+00, 2.744422672171478111e+00, 2.968232787190606619e+00, + 3.214236869520657666e+00, 3.485358957907730467e+00, 3.784966983117372821e+00, 4.116950138940295100e+00, + 4.485811369388231710e+00, 4.896778246562001812e+00, 5.355936290826725948e+00, 5.870389762600956907e+00, + 6.448456189131117605e+00, 7.099902452679558236e+00, 7.836232253282841261e+00, 8.671037293575230635e+00, + 9.620427777985990363e+00, 1.070356198876799531e+01, 1.194330008139441022e+01, 1.336701421038499647e+01, + 1.500759615914396343e+01, 1.690471548203528376e+01, 1.910639668731689597e+01, 2.167100443216577994e+01, + 2.466975274695099197e+01, 2.818989025157845355e+01, 3.233876132429401745e+01, 3.724900758097245740e+01, + 4.308526084907741997e+01, 5.005279647654703975e+01, 5.840877607253876528e+01, 6.847692821534239862e+01, + 8.066681777060714848e+01, 9.549927270200249260e+01, 1.136401195769487885e+02, 1.359451944976603209e+02, + 1.635207451879744447e+02, 1.978049687912586950e+02, 2.406787535889776661e+02, 2.946170292930555023e+02, + 3.628969532147125333e+02, 4.498861782715596902e+02, 5.614447353133496106e+02, 7.054892470899271429e+02, + 8.927907732799964116e+02, 1.138111424979478376e+03, 1.461835991563605367e+03, 1.892332623444716186e+03, + 2.469396036186133479e+03, 3.249311569298824731e+03, 4.312367113170283012e+03, 5.774094754500139661e+03, + 7.802247237500851845e+03, 1.064267530975806972e+04, 1.465915383535674990e+04, 2.039528541239754835e+04, + 2.867170622421556265e+04, 4.074033762183453297e+04, 5.853182310596923393e+04, 8.505689265265206640e+04, + 1.250649269847856615e+05, 1.861373943166749766e+05, 2.805255777452010927e+05, 4.282782486084761748e+05, + 6.626340506127657304e+05, 1.039443239650339565e+06, 1.653857426112961316e+06, 2.670315650125279161e+06, + 4.377212026624358795e+06, 7.288071713698413821e+06, 1.233172993400331694e+07, 2.121557285769933699e+07, + 3.713086254861535383e+07, 6.614579377352135534e+07, 1.200055291694917110e+08, 2.218629410296880690e+08, + 4.182282939928687703e+08, 8.043704132493714804e+08, 1.579392989425668114e+09, 3.168122415524104635e+09, + 6.496606811549861323e+09, 1.362851988356444486e+10, 2.926863897008707708e+10, 6.439798665209493735e+10, + 1.452755233772903022e+11, 3.362854459389246576e+11, 7.994202785433479271e+11, 1.953264233362291960e+12, + 4.909581868242554569e+12, 1.270622730765015610e+13, 3.389070986742985764e+13, 9.325084030208844833e+13, + 2.649489423834534140e+14, 7.781295184094957195e+14, 2.364715052527355639e+15, 7.444138031465958255e+15, + 2.430217240684749635e+16, 8.237068641534357762e+16, 2.902117050664548840e+17, 1.064157679404037013e+18, + 4.066277106061960017e+18, 1.621274233630359097e+19, 6.754156830915450013e+19, 2.944056841733781919e+20, + 1.344640139549107817e+21, 6.444586158944723300e+21, 3.246218667554608934e+22, 1.721234579556653533e+23, + 9.622533890240474391e+23, 5.681407260417956671e+24, 3.548890779995928184e+25, 2.349506425672269562e+26, + 1.651618130605205643e+27, 1.235147426493113059e+28, 9.845947239792057550e+28, 8.383130781984610418e+29, + 7.639649461399172445e+30, 7.467862732233885201e+31, 7.847691482004993660e+32, 8.886032557626454704e+33, + 1.086734890678302436e+35, 1.438967777036538458e+36, 2.068168865475603521e+37, 3.234885320223912385e+38, + 5.521233641542628514e+39, 1.031148231194663855e+41, 2.113272035816365982e+42, 4.766724345485077520e+43, + 1.186961550990218287e+45, 3.273172169205847573e+46, 1.002821226769167753e+48, 3.424933903935156479e+49, + 1.308436017026428736e+51, 5.611378330048420503e+52, 2.711424806327139291e+54, 1.481771793644066442e+56, + 9.194282071042778804e+57, 6.503661455875355562e+59, 5.266329986868627303e+61, 4.902662807969347359e+63, + 5.270511057289557050e+65, 6.572856511670583316e+67, 9.553956030013225387e+69, 1.626491911159411616e+72, + 3.259410915500951223e+74, 7.728460318113614280e+76, 2.179881996905918059e+79, 7.354484388371505915e+81, + 2.984831270803957746e+84, 1.465828267813438962e+87, 8.763355972629864261e+89, 6.417909665847831130e+92, + 5.794958649229893510e+95, 6.494224472311908365e+98, 9.095000156016433698e+101, 1.603058498455299102e+105, + 3.582099119119320529e+108, 1.022441227139854687e+112, 3.756872185015086057e+115, 1.791363463832849159e+119, + 1.117641882039472124e+123, 9.202159565546528285e+126, 1.008716474827888568e+131, 1.485546487089301805e+135, + 2.966961534830566097e+139, 8.114207284664369360e+143, 3.069178087507669739e+148, 1.622223681147791473e+153, }; + +__constant__ double m_abscissas_double_8[391] = + { 1.227227917054637830e-02, 3.682722894492590471e-02, 6.141337626871079991e-02, 8.605159708778207907e-02, + 1.107628840017845446e-01, 1.355683934957785482e-01, 1.604894937454335489e-01, 1.855478131645089496e-01, + 2.107652898670700524e-01, 2.361642222214626268e-01, 2.617673206785495261e-01, 2.875977610631342900e-01, + 3.136792395249035647e-01, 3.400360293536632770e-01, 3.666930398731810193e-01, 3.936758776386451797e-01, + 4.210109101746846268e-01, 4.487253325041450341e-01, 4.768472367324829462e-01, 5.054056849688209375e-01, + 5.344307858825229079e-01, 5.639537752137267134e-01, 5.940071005777549000e-01, 6.246245109268716053e-01, + 6.558411510586397969e-01, 6.876936615883514922e-01, 7.202202848338683401e-01, 7.534609770949572224e-01, + 7.874575278460963461e-01, 8.222536864020499377e-01, 8.578952966595825808e-01, 8.944304405668593009e-01, + 9.319095910247435485e-01, 9.703857749817920659e-01, 1.009914747547728584e+00, 1.050555178019083150e+00, + 1.092368848786092579e+00, 1.135420868172514300e+00, 1.179779898350424466e+00, 1.225518399571142610e+00, + 1.272712892062026473e+00, 1.321444237057985065e+00, 1.371797938567245953e+00, 1.423864467614384096e+00, + 1.477739610861208115e+00, 1.533524845679288858e+00, 1.591327743938355098e+00, 1.651262406984310076e+00, + 1.713449934511288211e+00, 1.778018930286256858e+00, 1.845106047964720870e+00, 1.914856580544951899e+00, + 1.987425097349017093e+00, 2.062976132795275283e+00, 2.141684931642916785e+00, 2.223738255848994521e+00, + 2.309335258687213796e+00, 2.398688432341103821e+00, 2.492024635808356095e+00, 2.589586210645122756e+00, + 2.691632192846832444e+00, 2.798439630014497291e+00, 2.910305013902562652e+00, 3.027545839497364963e+00, + 3.150502302946919722e+00, 3.279539151967394330e+00, 3.415047703805410611e+00, 3.557448047456550733e+00, + 3.707191448649779817e+00, 3.864762978128342125e+00, 4.030684386016531344e+00, 4.205517247588613835e+00, + 4.389866408585172458e+00, 4.584383761391930748e+00, 4.789772386950687695e+00, 5.006791101261363264e+00, + 5.236259449815274050e+00, 5.479063198337523150e+00, 5.736160373884817415e+00, 6.008587916728619858e+00, + 6.297469010648863048e+00, 6.604021167380929133e+00, 6.929565150124677837e+00, 7.275534831383860972e+00, + 7.643488092123492064e+00, 8.035118882502459288e+00, 8.452270579478188130e+00, 8.896950793641785313e+00, + 9.371347797016395173e+00, 9.877848765573446033e+00, 1.041906005527762037e+01, 1.099782975900831706e+01, + 1.161727282423952258e+01, 1.228079904848924611e+01, 1.299214431196691048e+01, 1.375540545535625881e+01, + 1.457507926620621316e+01, 1.545610610104852468e+01, 1.640391874338302925e+01, 1.742449718154208970e+01, + 1.852443008688437526e+01, 1.971098388378266494e+01, 2.099218043080961648e+01, 2.237688448013982946e+01, + 2.387490225270073820e+01, 2.549709266380430464e+01, 2.725549296232531555e+01, 2.916346081119624987e+01, + 3.123583514423284962e+01, 3.348911849136805118e+01, 3.594168387985465099e+01, 3.861400990307230737e+01, + 4.152894811329303023e+01, 4.471202755441533396e+01, 4.819180202224910174e+01, 5.200024654361558757e+01, + 5.617321062537384494e+01, 6.075093706918782079e+01, 6.577865661168003966e+01, 7.130727037357721343e+01, + 7.739413413465805794e+01, 8.410396085269633392e+01, 9.150986068496734448e+01, 9.969454113547704016e+01, + 1.087516939426018897e+02, 1.187876000643037532e+02, 1.299229897614516371e+02, 1.422952015056372537e+02, + 1.560606914665002671e+02, 1.713979549326432406e+02, 1.885109325154830073e+02, 2.076329877740125935e+02, + 2.290315594654587370e+02, 2.530136115655676467e+02, 2.799320282398896912e+02, 3.101931299766730890e+02, + 3.442655222107529892e+02, 3.826905303289378387e+02, 4.260945266207607701e+02, 4.752035175892902045e+02, + 5.308604366239058864e+02, 5.940456805372995009e+02, 6.659015428338778262e+02, 7.477613367309153870e+02, + 8.411841730471343023e+02, 9.479965698013741524e+02, 1.070342331375881840e+03, 1.210742457518582660e+03, + 1.372167241552205820e+03, 1.558123212187692722e+03, 1.772758188662716282e+03, 2.020988485411862984e+03, + 2.308653259329163157e+03, 2.642702189813684273e+03, 3.031424182869210212e+03, 3.484726676985756018e+03, + 4.014477504733973505e+03, 4.634924264049394751e+03, 5.363209949773439749e+03, 6.220008412114342803e+03, + 7.230309332853029956e+03, 8.424390216735217783e+03, 9.839022871538541787e+03, 1.151897463083113988e+04, + 1.351888098874374202e+04, 1.590558745460066947e+04, 1.876108572764816176e+04, 2.218620462393366275e+04, + 2.630526205054915357e+04, 3.127194401941711057e+04, 3.727675461256652923e+04, 4.455648280312273249e+04, + 5.340626592018903930e+04, 6.419500580388918123e+04, 7.738512642386820060e+04, 9.355796993981725963e+04, + 1.134465375820669470e+05, 1.379778272209741713e+05, 1.683277485807887053e+05, 2.059925746120735305e+05, + 2.528822024503158254e+05, 3.114422718347725915e+05, 3.848145913435570736e+05, 4.770485864966822643e+05, + 5.933809324724740854e+05, 7.406066190351666115e+05, 9.275730471470643372e+05, 1.165840260940180415e+06, + 1.470566322118246135e+06, 1.861698899014921971e+06, 2.365584870298354495e+06, 3.017152695505764877e+06, + 3.862882573599929249e+06, 4.964864305589750358e+06, 6.406362829959736606e+06, 8.299481847261302115e+06, + 1.079575892642401854e+07, 1.410087327474604091e+07, 1.849514724418250100e+07, 2.436224419670805500e+07, + 3.222951131863941234e+07, 4.282493882385925337e+07, 5.715793394339267637e+07, 7.663437932745451635e+07, + 1.032212725498489699e+08, 1.396833991976194842e+08, 1.899251497664892740e+08, 2.594865396467505851e+08, + 3.562664742464501497e+08, 4.915825413172413471e+08, 6.817316470116958142e+08, 9.502998105202541438e+08, + 1.331598295343277538e+09, 1.875801976010459831e+09, 2.656673907709731487e+09, 3.783240215616365909e+09, + 5.417531848500136979e+09, 7.801695369892847510e+09, 1.129965368955098833e+10, 1.646149161390821924e+10, + 2.412353995736687694e+10, 3.556486895431927094e+10, 5.275345014093760519e+10, 7.873572108325378177e+10, + 1.182569020317863604e+11, 1.787549442508363461e+11, 2.719633064979986142e+11, 4.165122153119897946e+11, + 6.421781858205134197e+11, 9.968725497576275918e+11, 1.558212327122960399e+12, 2.452809984907093786e+12, + 3.888656232828140210e+12, 6.209868990509424909e+12, 9.989924216297983665e+12, 1.619158001378611351e+13, + 2.644324518669926559e+13, 4.352018847904374786e+13, 7.218884688202741709e+13, 1.206997640727349538e+14, + 2.034483722445207402e+14, 3.457553102874402920e+14, 5.925248511957505706e+14, 1.024057793713038672e+15, + 1.785174045941642162e+15, 3.139306988668494696e+15, 5.569856270174890128e+15, 9.971763353834460328e+15, + 1.801687491114883092e+16, 3.285709858322565542e+16, 6.049018540910759710e+16, 1.124375283211369572e+17, + 2.110445125952435305e+17, 4.000737007891229992e+17, 7.660849361564329309e+17, 1.482018770996176700e+18, + 2.896945433910857945e+18, 5.722790165693470493e+18, 1.142689960439921462e+19, 2.306616559984106723e+19, + 4.707857184616093863e+19, 9.717346347495342813e+19, 2.028735605622585444e+20, 4.284840254171000581e+20, + 9.157027329021623836e+20, 1.980457834766411777e+21, 4.335604886702252004e+21, 9.609258559714223995e+21, + 2.156604630608586997e+22, 4.902045909695270289e+22, 1.128749227121328467e+23, 2.633414623049930879e+23, + 6.226335684490998543e+23, 1.492205279014148921e+24, 3.625768249717590109e+24, 8.933899764961444882e+24, + 2.232786981682262383e+25, 5.661295336293986732e+25, 1.456616710298133142e+26, 3.803959852868488245e+26, + 1.008531585603036490e+27, 2.715247425129423358e+27, 7.425071766766651967e+27, 2.062860712173225003e+28, + 5.824055458799413312e+28, 1.671388836696436644e+29, 4.876830632023956392e+29, 1.447170071146107156e+30, + 4.368562208925583783e+30, 1.341873806249251338e+31, 4.195251632754338682e+31, 1.335360134828214136e+32, + 4.328681350715136340e+32, 1.429401866150319186e+33, 4.809736146227180696e+33, 1.649624114567602575e+34, + 5.768677492419801469e+34, 2.057442854162761350e+35, 7.486423509917811063e+35, 2.780052791791155051e+36, + 1.053908347660081874e+37, 4.080046334235754223e+37, 1.613553311592805373e+38, 6.520836332997615098e+38, + 2.693848186257510992e+39, 1.138002408430710800e+40, 4.917748008813924613e+40, 2.174691073191358676e+41, + 9.844523745430526502e+41, 4.563707467590116732e+42, 2.167352073708379137e+43, 1.054860193887170754e+44, + 5.263588225566847365e+44, 2.693772458797916623e+45, 1.414506760560163074e+46, 7.624126763512016620e+46, + 4.219828148762794411e+47, 2.399387665831793264e+48, 1.402139947254117434e+49, 8.424706325525422943e+49, + 5.206918479942619318e+50, 3.311787866477716151e+51, 2.168683295509859155e+52, 1.462786368779206713e+53, + 1.016761784575838363e+54, 7.286460995145043184e+54, 5.386194237448865407e+55, 4.108917480528740640e+56, + 3.236445625945552728e+57, 2.633440652417619669e+58, 2.214702339357939268e+59, 1.926058995948268392e+60, + 1.733067740414174932e+61, 1.614307160124426969e+62, 1.557464328486352138e+63, 1.557226155197192031e+64, + 1.614473962707995344e+65, 1.736617406327386105e+66, 1.939201243451190521e+67, 2.249277732936622876e+68, + 2.711593798719765599e+69, 3.399628732048687119e+70, 4.435389696730206291e+71, 6.025566076164003981e+72, + 8.529161425383779849e+73, 1.258746322992988688e+75, 1.938112175186560210e+76, 3.115432363572610661e+77, + 5.231797674434390018e+78, 9.184930207860680757e+79, 1.686929404780378772e+81, 3.243565624474232635e+82, + 6.533812498930220075e+83, 1.379898823144620314e+85, 3.057650444842839916e+86, 7.114050545839171245e+87, + 1.739275024442258674e+89, 4.471782915853177804e+90, 1.210036789494028144e+92, 3.448828044590862359e+93, + 1.036226783750561565e+95, 3.284801914751206038e+96, 1.099514933602224638e+98, 3.889581731378242597e+99, + 1.455434287901069991e+101, 5.765729934387419019e+102, 2.420349568745475582e+104, 1.077606625929777536e+106, + 5.093346988695851845e+107, 2.558090824110323997e+109, 1.366512508719047964e+111, 7.771735800763526406e+112, + 4.710398638793014918e+114, 3.045563885587013954e+116, 2.102762552861442993e+118, 1.551937536212596136e+120, + 1.225676354426075970e+122, 1.036950946169703711e+124, 9.407885268970827717e+125, 9.163369107785093171e+127, + 9.592531095671168926e+129, 1.080486293361823875e+132, 1.311034829557782450e+134, 1.715642975932639188e+136, + 2.424231742707881878e+138, 3.703231223333127919e+140, 6.123225027409988902e+142, 1.097271040771196765e+145, + 2.133693643241295977e+147, 4.508099184895777328e+149, 1.036252806686291189e+152, }; + +__constant__ double* m_abscissas_double[8] = { + m_abscissas_double_1, + m_abscissas_double_2, + m_abscissas_double_3, + m_abscissas_double_4, + m_abscissas_double_5, + m_abscissas_double_6, + m_abscissas_double_7, + m_abscissas_double_8, +}; + +__constant__ double m_weights_double_1[6] = + { 7.868241604839621507e+00, 8.805163880733011116e+02, 5.396278323520705668e+07, 8.876511896968161317e+19, + 2.432791879269225553e+52, 6.399713512080202911e+139, }; + +__constant__ double m_weights_double_2[6] = + { 2.398524276302635218e+00, 5.244596423726681022e+01, 6.457887819598201760e+04, 2.509985242511374506e+12, + 1.774029269327138701e+32, 2.781406115983097314e+85, }; + +__constant__ double m_weights_double_3[12] = + { 1.749369583108386852e+00, 3.979658981934607813e+00, 1.848514598574449570e+01, 1.864880718932067988e+02, + 5.974205695263265855e+03, 1.270412635144623341e+06, 6.164193014295984071e+09, 5.230850031811222530e+15, + 2.226260929943369774e+25, 1.199931102042181592e+41, 7.470602144275146214e+66, 1.814465860528410676e+109, }; + +__constant__ double m_weights_double_4[24] = + { 1.613859062188366173e+00, 1.997767291869673262e+00, 3.020231979908834220e+00, 5.477641843859057761e+00, + 1.179660916492671672e+01, 3.035504848518598294e+01, 9.584421793794920860e+01, 3.893870238229992076e+02, + 2.179193250357911344e+03, 1.839208123964132852e+04, 2.632120612599856167e+05, 7.427296507169468210e+06, + 5.015875648341232356e+08, 1.039610867241544113e+11, 9.100328911818091977e+13, 5.068651163890231571e+17, + 3.039966520714902616e+22, 3.857740194672007962e+28, 2.465542763666581087e+36, 2.416439449167799461e+46, + 1.517091553926604149e+59, 3.825043412021411380e+75, 4.089582396821598640e+96, 3.823775894295564050e+123, }; + +__constant__ double m_weights_double_5[49] = + { 1.581465959536694744e+00, 1.669149910438534746e+00, 1.857523188595005770e+00, 2.175662623626994120e+00, + 2.675901375211020564e+00, 3.447738682498791744e+00, 4.643946540355464126e+00, 6.530204496574248616e+00, + 9.582285015566804961e+00, 1.468361407515440960e+01, 2.354449548740987533e+01, 3.963527273305166705e+01, + 7.037635206267538547e+01, 1.325880124784838868e+02, 2.669625649541569172e+02, 5.793749198508472676e+02, + 1.368691928321303605e+03, 3.559435721533130554e+03, 1.032186677270763318e+04, 3.386621302858741487e+04, + 1.278166259840246830e+05, 5.654082513926693098e+05, 2.994462044781721833e+06, 1.944975023421914947e+07, + 1.592193007690560588e+08, 1.694288818617459913e+09, 2.427156182311303271e+10, 4.870317848199455490e+11, + 1.431819656229181793e+13, 6.489471523099301256e+14, 4.803757752508989106e+16, 6.200096361305331541e+18, + 1.502568562439914899e+21, 7.436061367189688251e+23, 8.264761218677928603e+26, 2.297735027897804345e+30, + 1.805449779569534997e+34, 4.604472360199061931e+38, 4.458371212030626854e+43, 1.957638261114809309e+49, + 4.767368137162500764e+55, 8.088820139476721285e+62, 1.238260897349286357e+71, 2.292272505278842062e+80, + 7.151392373749193549e+90, 5.476714850156044431e+102, 1.576655618370700681e+116, 2.765448595957851958e+131, + 5.108051255283132673e+148, }; + +__constant__ double m_weights_double_6[98] = + { 1.573457773573108386e+00, 1.594892755038663787e+00, 1.638536515530234742e+00, 1.705980408212213620e+00, + 1.799724394608737275e+00, 1.923322854425656307e+00, 2.081597373313268178e+00, 2.280934883790070511e+00, + 2.529697852387704655e+00, 2.838784782552951185e+00, 3.222395745020980612e+00, 3.699081358854235112e+00, + 4.293188274330526800e+00, 5.036865356322330076e+00, 5.972871140910932199e+00, 7.158538424311077564e+00, + 8.671427800892076385e+00, 1.061747360297922326e+01, 1.314285002260235600e+01, 1.645145625668428040e+01, + 2.083099449998189069e+01, 2.669235989791640190e+01, 3.462993514791378189e+01, 4.551518362653662579e+01, + 6.064408087764392116e+01, 8.197296917485846798e+01, 1.125020468081652564e+02, 1.569096552844714123e+02, + 2.226204347868638276e+02, 3.216385489504077755e+02, 4.737574505945461739e+02, 7.122994548146997637e+02, + 1.094609652686376553e+03, 1.721697789176049576e+03, 2.775924909253835146e+03, 4.595230066268149347e+03, + 7.823427586641573672e+03, 1.372357435269105405e+04, 2.485188961645119553e+04, 4.655538745425972783e+04, + 9.041766782135686884e+04, 1.824843964862728392e+05, 3.836800264094614027e+05, 8.426271970245168026e+05, + 1.938432574158782634e+06, 4.685112849356485528e+06, 1.193528667218607927e+07, 3.215643752247989316e+07, + 9.196008928386600386e+07, 2.802223178457559964e+08, 9.136110825267458886e+08, 3.200910900783148591e+09, + 1.210765264234723689e+10, 4.969024745093101808e+10, 2.224315751863855216e+11, 1.092125344449313660e+12, + 5.916882980019919359e+12, 3.559743438494577249e+13, 2.394353652945465191e+14, 1.813551073517501917e+15, + 1.558736706166165738e+16, 1.532714875555114333e+17, 1.739274776190789212e+18, 2.298841216802216313e+19, + 3.574030698837762664e+20, 6.604899705451419080e+21, 1.467155879591820659e+23, 3.964094964398509381e+24, + 1.319342840595348793e+26, 5.482251971340400742e+27, 2.885137894723827518e+29, 1.952539840765392110e+31, + 1.727051489032222797e+33, 2.031343507095439396e+35, 3.236074146972599980e+37, 7.120487412983497200e+39, + 2.209552707411017265e+42, 9.886282647791384648e+44, 6.530514048788273529e+47, 6.530706672481546528e+50, + 1.015518807431281951e+54, 2.526366773162394510e+57, 1.036450519906790297e+61, 7.241966032627135861e+64, + 8.919402520769714938e+68, 2.008463619152992905e+73, 8.596914764830260020e+77, 7.290599546829495220e+82, + 1.280199563216419112e+88, 4.878349285603201150e+93, 4.240828248064127940e+99, 8.869771764721598720e+105, + 4.723342575741417669e+112, 6.802035963326188581e+119, 2.824531180990009549e+127, 3.621049216745982252e+135, + 1.541270150334942520e+144, 2.353376995174362785e+153, }; + +__constant__ double m_weights_double_7[196] = + { 1.571461316550783294e+00, 1.576790166316938345e+00, 1.587495640370383316e+00, 1.603673956341370210e+00, + 1.625471125457493943e+00, 1.653085011915939302e+00, 1.686768142525911236e+00, 1.726831323537516202e+00, + 1.773648138667236602e+00, 1.827660421478661448e+00, 1.889384817044018196e+00, 1.959420572855037091e+00, + 2.038458728047908923e+00, 2.127292904083847225e+00, 2.226831940199076941e+00, 2.338114664555130296e+00, + 2.462327148722991304e+00, 2.600822860927085164e+00, 2.755146214814554359e+00, 2.927060108424483555e+00, + 3.118578166240921951e+00, 3.332002540339506630e+00, 3.569968300410740276e+00, 3.835495653996447262e+00, + 4.132051496512934885e+00, 4.463622106699067881e+00, 4.834799191008006557e+00, 5.250881957765679608e+00, + 5.717998490875333124e+00, 6.243250421598568105e+00, 6.834885801226541839e+00, 7.502506202789340802e+00, + 8.257315484493544201e+00, 9.112419405864642634e+00, 1.008318749543997758e+01, 1.118769134993865202e+01, + 1.244723705914106881e+01, 1.388701390605507587e+01, 1.553688715915900190e+01, 1.743237000680942831e+01, + 1.961581894823993424e+01, 2.213790886354273806e+01, 2.505945934677137610e+01, 2.845370377742137561e+01, + 3.240911845969524834e+01, 3.703296289480230161e+01, 4.245572644746267911e+01, 4.883673480337985582e+01, + 5.637124640586975420e+01, 6.529947092752610340e+01, 7.591807755694122837e+01, 8.859494252391663822e+01, + 1.037881295005788124e+02, 1.220704263969226746e+02, 1.441612098131200535e+02, 1.709680191245773511e+02, + 2.036410593843575570e+02, 2.436450058708723643e+02, 2.928540812182076105e+02, 3.536786019152253392e+02, + 4.292343083967296939e+02, 5.235701840488733027e+02, 6.419766898003024575e+02, 7.914052083668759283e+02, + 9.810422089081931637e+02, 1.223099994999740393e+03, 1.533912555427112127e+03, 1.935464013605830339e+03, + 2.457534549912886852e+03, 3.140733731623635519e+03, 4.040818188564651898e+03, 5.234881599712225681e+03, + 6.830294457607329226e+03, 8.977713228649887143e+03, 1.189015920967326839e+04, 1.587122387044346962e+04, + 2.135711106445789331e+04, 2.897983705189681437e+04, 3.966306726795547950e+04, 5.476875193750000787e+04, + 7.632356539388055680e+04, 1.073719149754976951e+05, 1.525316674555574152e+05, 2.188778434744216586e+05, + 3.173624496019295608e+05, 4.651201525869328462e+05, 6.892537656280580572e+05, 1.033119885120019982e+06, + 1.566887981043252499e+06, 2.405492027026531795e+06, 3.739528964815910340e+06, 5.889121154895580032e+06, + 9.399046351922342030e+06, 1.520903276129653518e+07, 2.496287187293576168e+07, 4.157759259963074840e+07, + 7.030705366950267312e+07, 1.207598558452493366e+08, 2.107882509464846833e+08, 3.741047199023457864e+08, + 6.754494594987415572e+08, 1.241316740415880537e+09, 2.323310032649552862e+09, 4.431176019026625759e+09, + 8.617446487400900130e+09, 1.709836906604031513e+10, 3.463574521880171339e+10, 7.167607123799270726e+10, + 1.516347620910054079e+11, 3.281729323238950526e+11, 7.271102600298280790e+11, 1.650499552378780378e+12, + 3.841338149508803917e+12, 9.173744267785176575e+12, 2.249901946357519979e+13, 5.671535089900611731e+13, + 1.470742250307697019e+14, 3.927012518464311775e+14, 1.080639977391212820e+15, 3.067671466720475189e+15, + 8.992386789198328428e+15, 2.724722536524592111e+16, 8.542946122263389258e+16, 2.774613718725574755e+17, + 9.345299479382029121e+17, 3.267996122987731882e+18, 1.187914433455468315e+19, 4.494053408418564214e+19, + 1.771706652195486743e+20, 7.288102552885931527e+20, 3.132512430816625349e+21, 1.408743767951073110e+22, + 6.638294268236060414e+22, 3.282543608403565013e+23, 1.705920098038394064e+24, 9.332259385148524285e+24, + 5.382727175874888312e+25, 3.278954235122093249e+26, 2.113191697957458099e+27, 1.443411041499643040e+28, + 1.046864394654982423e+29, 8.077319226958905700e+29, 6.643146963432616277e+30, 5.835670121359986260e+31, + 5.486890296790230798e+32, 5.533726968508261614e+33, 5.999734996418352834e+34, 7.009176119466122569e+35, + 8.844061966424597499e+36, 1.208226860869605961e+38, 1.791648514311063338e+39, 2.891313916713205762e+40, + 5.091457860211527298e+41, 9.810630588402496553e+42, 2.074441239147378860e+44, 4.827650116937700540e+45, + 1.240287939111549029e+47, 3.528782858644784616e+48, 1.115449490471696659e+50, 3.930510643328196314e+51, + 1.549243712957852337e+53, 6.854998238041301002e+54, 3.417479961583207704e+56, 1.926905498641079990e+58, + 1.233580963004919450e+60, 9.002819902898076915e+61, 7.521415141253441645e+63, 7.224277554900578993e+65, + 8.012832830535078610e+67, 1.030999620286380369e+70, 1.546174957076748679e+72, 2.715803772613248694e+74, + 5.615089920571746438e+76, 1.373667859345343337e+79, 3.997541020769625126e+81, 1.391500589339800087e+84, + 5.826693844912022892e+86, 2.952274820929549096e+89, 1.821023061478466282e+92, 1.375973022137941526e+95, + 1.281852367543412945e+98, 1.482130127201990503e+101, 2.141574273792435314e+104, 3.894495540947112380e+107, + 8.978646362580102961e+110, 2.644131589807244050e+114, 1.002403539841913834e+118, 4.931412804903905259e+121, + 3.174401112435865044e+125, 2.696624001761892390e+129, 3.049799322320447166e+133, 4.634041526818687785e+137, + 9.548983134803106512e+141, 2.694404866192089829e+146, 1.051502720036395325e+151, 5.734170640626244955e+155, }; + +__constant__ double m_weights_double_8[391] = + { 1.570962550997832611e+00, 1.572292902367211961e+00, 1.574956581912666755e+00, 1.578959553636163985e+00, + 1.584310789563614305e+00, 1.591022301117035107e+00, 1.599109181186160337e+00, 1.608589657109067468e+00, + 1.619485154826419743e+00, 1.631820374530739318e+00, 1.645623378191125679e+00, 1.660925689395424109e+00, + 1.677762406016463717e+00, 1.696172326277082973e+00, 1.716198088860732467e+00, 1.737886327791014562e+00, + 1.761287842885152410e+00, 1.786457786673686420e+00, 1.813455868772335587e+00, 1.842346578792652542e+00, + 1.873199428986627521e+00, 1.906089217937612619e+00, 1.941096316736779451e+00, 1.978306979221816566e+00, + 2.017813678003844337e+00, 2.059715468170813895e+00, 2.104118380732327493e+00, 2.151135848063375554e+00, + 2.200889163814591418e+00, 2.253507979986114202e+00, 2.309130844113053375e+00, 2.367905779785113334e+00, + 2.429990914023652954e+00, 2.495555155369085590e+00, 2.564778926893134514e+00, 2.637854958747451684e+00, + 2.714989145296268067e+00, 2.796401472360280536e+00, 2.882327020626578700e+00, 2.973017051860293803e+00, + 3.068740185193628238e+00, 3.169783671473487386e+00, 3.276454774427328601e+00, 3.389082268266156098e+00, + 3.508018062292869136e+00, 3.633638964133530274e+00, 3.766348594369884204e+00, 3.906579466636309289e+00, + 4.054795248667541120e+00, 4.211493221360917802e+00, 4.377206954666462219e+00, 4.552509221059946388e+00, + 4.738015169510782826e+00, 4.934385785253587887e+00, 5.142331663338191074e+00, 5.362617126899976224e+00, + 5.596064724397100194e+00, 5.843560143744373307e+00, 6.106057585381734693e+00, 6.384585640900671436e+00, + 6.680253728973824449e+00, 6.994259146058412709e+00, 7.327894795748901060e+00, 7.682557667824588764e+00, + 8.059758146071137270e+00, 8.461130232962342889e+00, 8.888442789395671080e+00, 9.343611899025485155e+00, + 9.828714479494622022e+00, 1.034600327721380625e+01, 1.089792339849122916e+01, 1.148713054801325790e+01, + 1.211651116619788555e+01, 1.278920468010096321e+01, 1.350862810871281096e+01, 1.427850329305334421e+01, + 1.510288705493181327e+01, 1.598620462612703196e+01, 1.693328673269081128e+01, 1.794941076780000506e+01, + 1.904034654190823159e+01, 2.021240716182964334e+01, 2.147250566192247370e+01, 2.282821809199713505e+01, + 2.428785385941680425e+01, 2.586053422878117785e+01, 2.755628000354674426e+01, 2.938610955221109564e+01, + 3.136214849990951329e+01, 3.349775258749912582e+01, 3.580764540799625468e+01, 3.830807296872530167e+01, + 4.101697730155473447e+01, 4.395419165876113623e+01, 4.714166019494196927e+01, 5.060368545366659226e+01, + 5.436720746019445252e+01, 5.846211877912138439e+01, 6.292162054058128784e+01, 6.778262518512416663e+01, + 7.308621254265223015e+01, 7.887814686488147292e+01, 8.520946359734658334e+01, 9.213713603387774717e+01, + 9.972483357670754649e+01, 1.080437851679046426e+02, 1.171737636088621692e+02, 1.272042089988687372e+02, + 1.382355124664102373e+02, 1.503804848151483311e+02, 1.637660387526102742e+02, 1.785351181233383403e+02, + 1.948489131607280604e+02, 2.128894073598352670e+02, 2.328623093447990790e+02, 2.550004322843281994e+02, + 2.795675942672445782e+02, 3.068631259124280934e+02, 3.372270867451200874e+02, 3.710463099965576255e+02, + 4.087614170466174911e+02, 4.508749684194593670e+02, 4.979609488959773491e+02, 5.506758209385785877e+02, + 6.097714244663179092e+02, 6.761100535726473685e+02, 7.506821038741422446e+02, 8.346267600518081192e+02, + 9.292562845315541998e+02, 1.036084578498234728e+03, 1.156860819661897657e+03, 1.293609142453808600e+03, + 1.448675521854205144e+03, 1.624783259532197615e+03, 1.825098759915318560e+03, 2.053309635972617554e+03, + 2.313717614494777200e+03, 2.611349236640186999e+03, 2.952087994093624299e+03, 3.342832332560548180e+03, + 3.791684927756595099e+03, 4.308179838716318955e+03, 4.903555624570201673e+03, 5.591084343634811452e+03, + 6.386468625571246341e+03, 7.308321829412979440e+03, 8.378749812799703561e+03, 9.624057218749638059e+03, + 1.107560666191146008e+04, 1.277086605445904388e+04, 1.475468792019489452e+04, 1.708087537417066343e+04, + 1.981410309695485051e+04, 2.303227888204754908e+04, 2.682945317928632535e+04, 3.131941178398428200e+04, + 3.664012209706997997e+04, 4.295924836668690170e+04, 5.048100882639843572e+04, 5.945472133180055290e+04, + 7.018547875172689579e+04, 8.304751726175694003e+04, 9.850099805053575446e+04, 1.171131266261766060e+05, + 1.395847982160589845e+05, 1.667843016393077556e+05, 1.997900626520524686e+05, 2.399449946032992187e+05, + 2.889257939838013232e+05, 3.488315309194304548e+05, 4.222972201496778447e+05, 5.126398246369253619e+05, + 6.240464876221989792e+05, 7.618179073233615941e+05, 9.326839300224119257e+05, 1.145214007774297539e+06, + 1.410352646274233119e+06, 1.742120041875863385e+06, 2.158531716934287014e+06, 2.682809410126426731e+06, + 3.344980563595418861e+06, 4.183997972337706048e+06, 5.250558008165501752e+06, 6.610860174141680988e+06, + 8.351639423967558693e+06, 1.058692532393929900e+07, 1.346715235106239409e+07, 1.719148271024263021e+07, + 2.202453449027701694e+07, 2.831917301724337797e+07, 3.654767820268344932e+07, 4.734452657230626106e+07, + 6.156534063509513873e+07, 8.036843026897869248e+07, 1.053280284359690289e+08, 1.385921689084126286e+08, + 1.831036985925683524e+08, 2.429109457458640820e+08, 3.236062393759667463e+08, 4.329475218599986663e+08, + 5.817432967962929479e+08, 7.851179789388191786e+08, 1.064329197627075307e+09, 1.449389582912945485e+09, + 1.982866469377991849e+09, 2.725414314698094324e+09, 3.763867964111621444e+09, 5.223138814950990937e+09, + 7.283785810644397704e+09, 1.020809642381158743e+10, 1.437899318470510521e+10, 2.035836812543633578e+10, + 2.897499827080027444e+10, 4.145773751645494878e+10, 5.963837683872426287e+10, 8.626228483915530800e+10, + 1.254667045389825180e+11, 1.835212982264913186e+11, 2.699812207400151604e+11, 3.994928452151922954e+11, + 5.946380558701434550e+11, 8.904409967424091107e+11, 1.341551941677775838e+12, 2.033768550332151892e+12, + 3.102627959875753214e+12, 4.763598321705862063e+12, 7.361420360560813584e+12, 1.145126961456557423e+13, + 1.793314186996273926e+13, 2.827585501285792232e+13, 4.489297053678444669e+13, 7.177802872658499571e+13, + 1.155855098545820625e+14, 1.874833886367883093e+14, 3.063510356402174454e+14, 5.043400653005970242e+14, + 8.366163396892429890e+14, 1.398556351640947289e+15, 2.356335749516164682e+15, 4.001765167382637456e+15, + 6.851375128404941445e+15, 1.182690111761543990e+16, 2.058673527013806443e+16, 3.613968784314904633e+16, + 6.399112184394213551e+16, 1.143016185628376923e+17, 2.059881383915666443e+17, 3.745846788353680914e+17, + 6.874443034683149068e+17, 1.273407643613485314e+18, 2.381241916829895366e+18, 4.495835617307108399e+18, + 8.571442024901952701e+18, 1.650443584181656965e+19, 3.210100352421317851e+19, 6.307780124442703091e+19, + 1.252404031157661279e+20, 2.513005295649985394e+20, 5.096776255690838436e+20, 1.045019200016673046e+21, + 2.166476479260878466e+21, 4.542138145678395463e+21, 9.632082324449137128e+21, 2.066386536688254528e+22, + 4.485529785554428251e+22, 9.853879573610977508e+22, 2.191158874464374408e+23, 4.932835964390971668e+23, + 1.124501529971774363e+24, 2.596269136156756008e+24, 6.072292938313625501e+24, 1.438989066308003836e+25, + 3.455841956406570469e+25, 8.412655191713576490e+25, 2.076289061650816510e+26, 5.196515024640220322e+26, + 1.319173194089644043e+27, 3.397455895980380794e+27, 8.879057454438503591e+27, 2.355272361492064126e+28, + 6.342762007722624824e+28, 1.734531093990859705e+29, 4.817893170606830871e+29, 1.359597346490148232e+30, + 3.898969689906500392e+30, 1.136542986529989936e+31, 3.368450043991780017e+31, 1.015304084709817260e+32, + 3.113144376221918237e+32, 9.713072739730140403e+32, 3.084517643581725946e+33, 9.972682139820497284e+33, + 3.283625052288491586e+34, 1.101378785390827536e+35, 3.764333367592714297e+35, 1.311403465938242926e+36, + 4.658135710682813672e+36, 1.687517347470511392e+37, 6.237053685018323490e+37, 2.352571314427744869e+38, + 9.058938240219699936e+38, 3.562249097611136071e+39, 1.430959291578558210e+40, 5.873974584984375049e+40, + 2.464828549811283787e+41, 1.057649203090855628e+42, 4.642475639281078035e+42, 2.085287118272421779e+43, + 9.588439985186632177e+43, 4.514982011246092280e+44, 2.177974048341973204e+45, 1.076720976822900458e+46, + 5.457267432929085589e+46, 2.836869270455781134e+47, 1.513103201392011626e+48, 8.283974667225617075e+48, + 4.657239491995971344e+49, 2.689796370712836937e+50, 1.596597846911970388e+51, 9.744154538256586629e+51, + 6.117238394843313065e+52, 3.952049650585241827e+53, 2.628701592074258213e+54, 1.800990196502679393e+55, + 1.271554462563068383e+56, 9.255880104477760711e+56, 6.949737920133919393e+57, 5.385167200769965621e+58, + 4.308493668102978774e+59, 3.560951557542178371e+60, 3.041888528384649992e+61, 2.687094441930837189e+62, + 2.455920538900000855e+63, 2.323648254168641537e+64, 2.277129741584892331e+65, 2.312633552913224734e+66, + 2.435407592981291129e+67, 2.660910388822465246e+68, 3.018105943423533920e+69, 3.555823489510192503e+70, + 4.354188877793849013e+71, 5.544975795511813315e+72, 7.348276481909886336e+73, 1.013998025722423261e+75, + 1.457911462244607943e+76, 2.185488876819505295e+77, 3.418022153286623008e+78, 5.580843920601835728e+79, + 9.519586502799733908e+80, 1.697573578247197786e+82, 3.166906670990180014e+83, 6.185099106418675430e+84, + 1.265541134386934377e+86, 2.714828965877756899e+87, 6.110386802964494082e+88, 1.444054086171083239e+90, + 3.586083726638388165e+91, 9.365231868063239600e+92, 2.574080116205122449e+94, 7.452134689862302719e+95, + 2.274309903836169819e+97, 7.323011134121164749e+98, 2.489816421737932462e+100, 8.946533386359281588e+101, + 3.400401372391165979e+103, 1.368288186208928217e+105, 5.834277489829591931e+106, 2.638486937672383424e+108, + 1.266728882767139521e+110, 6.462225178314182803e+111, 3.506432320607573604e+113, 2.025608933943268165e+115, + 1.247041677084784707e+117, 8.189865188405279038e+118, 5.743610894406099965e+120, 4.305808934084489763e+122, + 3.454156966079496755e+124, 2.968316601530352737e+126, 2.735456242372183592e+128, 2.706317176690077847e+130, + 2.877679916342060385e+132, 3.292412878268106390e+134, 4.057840961953725969e+136, 5.393783049105737324e+138, + 7.741523901672235406e+140, 1.201209962310668456e+143, 2.017456079556807301e+145, 3.672176623483062526e+147, + 7.253163798058577630e+149, 1.556591535302570570e+152, 3.634399832790394885e+154, }; + +__constant__ double* m_weights_double[8] = { + m_weights_double_1, + m_weights_double_2, + m_weights_double_3, + m_weights_double_4, + m_weights_double_5, + m_weights_double_6, + m_weights_double_7, + m_weights_double_8 +}; +__constant__ boost::math::size_t float_coefficients_size[8] = {4, 4, 8, 16, 32, 65, 129, 259}; + +__constant__ boost::math::size_t double_coefficients_size[8] = {6, 6, 12, 24, 49, 98, 196, 391}; + +template +struct coefficients_selector; + +template<> +struct coefficients_selector +{ + __device__ static const auto abscissas() { return m_abscissas_float; } + __device__ static const auto weights() { return m_weights_float; } + __device__ static const auto size() { return float_coefficients_size; } +}; + +template<> +struct coefficients_selector +{ + __device__ static const auto abscissas() { return m_abscissas_double; } + __device__ static const auto weights() { return m_weights_double; } + __device__ static const auto size() { return double_coefficients_size; } +}; + +template > +__device__ auto sinh_sinh_integrate_impl(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels) +{ + BOOST_MATH_STD_USING + using boost::math::constants::half; + using boost::math::constants::half_pi; + using boost::math::size_t; + + constexpr auto function = "boost::math::quadrature::sinh_sinh<%1%>::integrate"; + + using K = decltype(f(static_cast(0))); + static_assert(!boost::math::is_integral::value, + "The return type cannot be integral, it must be either a real or complex floating point type."); + + K y_max = f(boost::math::tools::max_value()); + + if(abs(y_max) > boost::math::tools::epsilon()) + { + return static_cast(policies::raise_domain_error(function, + "The function you are trying to integrate does not go to zero at infinity, and instead evaluates to %1%", y_max, Policy())); + } + + K y_min = f(-boost::math::tools::max_value()); + + if(abs(y_min) > boost::math::tools::epsilon()) + { + return static_cast(policies::raise_domain_error(function, + "The function you are trying to integrate does not go to zero at -infinity, and instead evaluates to %1%", y_max, Policy())); + } + + // Get the party started with two estimates of the integral: + const auto m_abscissas = coefficients_selector::abscissas(); + const auto m_weights = coefficients_selector::weights(); + const auto m_size = coefficients_selector::size(); + + K I0 = f(0)*half_pi(); + Real L1_I0 = abs(I0); + for(size_t i = 0; i < m_size[0]; ++i) + { + Real x = m_abscissas[0][i]; + K yp = f(x); + K ym = f(-x); + I0 += (yp + ym)*m_weights[0][i]; + L1_I0 += (abs(yp)+abs(ym))*m_weights[0][i]; + } + + K I1 = I0; + Real L1_I1 = L1_I0; + for (size_t i = 0; i < m_size[1]; ++i) + { + Real x= m_abscissas[1][i]; + K yp = f(x); + K ym = f(-x); + I1 += (yp + ym)*m_weights[1][i]; + L1_I1 += (abs(yp) + abs(ym))*m_weights[1][i]; + } + + I1 *= half(); + L1_I1 *= half(); + Real err = abs(I0 - I1); + + size_t i = 2; + for(; i <= 8U; ++i) + { + I0 = I1; + L1_I0 = L1_I1; + + I1 = half()*I0; + L1_I1 = half()*L1_I0; + Real h = static_cast(1) / static_cast(1 << i); + K sum = 0; + Real absum = 0; + + Real abterm1 = 1; + Real eps = boost::math::tools::epsilon()*L1_I1; + + auto abscissa_row = m_abscissas[i]; + auto weight_row = m_weights[i]; + + for(size_t j = 0; j < m_size[i]; ++j) + { + Real x = abscissa_row[j]; + K yp = f(x); + K ym = f(-x); + sum += (yp + ym)*weight_row[j]; + Real abterm0 = (abs(yp) + abs(ym))*weight_row[j]; + absum += abterm0; + + // We require two consecutive terms to be < eps in case we hit a zero of f. + if (x > static_cast(100) && abterm0 < eps && abterm1 < eps) + { + break; + } + abterm1 = abterm0; + } + + I1 += sum*h; + L1_I1 += absum*h; + err = abs(I0 - I1); + + if (!(boost::math::isfinite)(L1_I1)) + { + constexpr auto err_msg = "The sinh_sinh quadrature evaluated your function at a singular point, leading to the value %1%.\n" + "sinh_sinh quadrature cannot handle singularities in the domain.\n" + "If you are sure your function has no singularities, please submit a bug against boost.math\n"; + return static_cast(policies::raise_evaluation_error(function, err_msg, I1, Policy())); + } + if (err <= tolerance*L1_I1) + { + break; + } + } + + if (error) + { + *error = err; + } + + if (L1) + { + *L1 = L1_I1; + } + + if (levels) + { + *levels = i; + } + + return I1; +} + +} // Namespace detail +} // Namespace quadrature +} // Namespace math +} // Namespace boost + +#endif // BOOST_MATH_ENABLE_CUDA + +#endif // BOOST_MATH_QUADRATURE_DETAIL_SINH_SINH_DETAIL_HPP diff --git a/include/boost/math/quadrature/exp_sinh.hpp b/include/boost/math/quadrature/exp_sinh.hpp index f28493737e..d3148e0c0a 100644 --- a/include/boost/math/quadrature/exp_sinh.hpp +++ b/include/boost/math/quadrature/exp_sinh.hpp @@ -15,11 +15,15 @@ #ifndef BOOST_MATH_QUADRATURE_EXP_SINH_HPP #define BOOST_MATH_QUADRATURE_EXP_SINH_HPP +#include +#include + +#ifndef BOOST_MATH_HAS_NVRTC + #include #include #include #include -#include namespace boost{ namespace math{ namespace quadrature { @@ -98,4 +102,79 @@ auto exp_sinh::integrate(const F& f, Real tolerance, Real* error, }}} -#endif + +#endif // BOOST_MATH_HAS_NVRTC + +#ifdef BOOST_MATH_ENABLE_CUDA + +#include +#include +#include +#include +#include + +namespace boost { +namespace math { +namespace quadrature { + +template > +__device__ auto exp_sinh_integrate(const F& f, Real a, Real b, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels) +{ + BOOST_MATH_STD_USING + + using K = decltype(f(a)); + static_assert(!boost::math::is_integral::value, + "The return type cannot be integral, it must be either a real or complex floating point type."); + + constexpr auto function = "boost::math::quadrature::exp_sinh<%1%>::integrate"; + + // Neither limit may be a NaN: + if((boost::math::isnan)(a) || (boost::math::isnan)(b)) + { + return static_cast(policies::raise_domain_error(function, "NaN supplied as one limit of integration - sorry I don't know what to do", a, Policy())); + } + // Right limit is infinite: + if ((boost::math::isfinite)(a) && (b >= boost::math::tools::max_value())) + { + // If a = 0, don't use an additional level of indirection: + if (a == static_cast(0)) + { + return detail::exp_sinh_integrate_impl(f, tolerance, error, L1, levels); + } + const auto u = [&](Real t)->K { return f(t + a); }; + return detail::exp_sinh_integrate_impl(u, tolerance, error, L1, levels); + } + + if ((boost::math::isfinite)(b) && a <= -boost::math::tools::max_value()) + { + const auto u = [&](Real t)->K { return f(b-t);}; + return detail::exp_sinh_integrate_impl(u, tolerance, error, L1, levels); + } + + // Infinite limits: + if ((a <= -boost::math::tools::max_value()) && (b >= boost::math::tools::max_value())) + { + return static_cast(policies::raise_domain_error(function, "Use sinh_sinh quadrature for integration over the whole real line; exp_sinh is for half infinite integrals.", a, Policy())); + } + // If we get to here then both ends must necessarily be finite: + return static_cast(policies::raise_domain_error(function, "Use tanh_sinh quadrature for integration over finite domains; exp_sinh is for half infinite integrals.", a, Policy())); +} + +template > +__device__ auto exp_sinh_integrate(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels) +{ + BOOST_MATH_STD_USING + constexpr auto function = "boost::math::quadrature::exp_sinh<%1%>::integrate"; + if (abs(tolerance) > 1) { + return policies::raise_domain_error(function, "The tolerance provided (%1%) is unusually large; did you confuse it with a domain bound?", tolerance, Policy()); + } + return detail::exp_sinh_integrate_impl(f, tolerance, error, L1, levels); +} + +} // namespace quadrature +} // namespace math +} // namespace boost + +#endif // BOOST_MATH_ENABLE_CUDA + +#endif // BOOST_MATH_QUADRATURE_EXP_SINH_HPP diff --git a/include/boost/math/quadrature/sinh_sinh.hpp b/include/boost/math/quadrature/sinh_sinh.hpp index ed958eb8d2..7aabcb4376 100644 --- a/include/boost/math/quadrature/sinh_sinh.hpp +++ b/include/boost/math/quadrature/sinh_sinh.hpp @@ -1,4 +1,5 @@ // Copyright Nick Thompson, 2017 +// Copyright Matt Borland, 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt @@ -15,10 +16,17 @@ #ifndef BOOST_MATH_QUADRATURE_SINH_SINH_HPP #define BOOST_MATH_QUADRATURE_SINH_SINH_HPP +#include +#include +#include +#include +#include + +#ifndef BOOST_MATH_HAS_NVRTC + #include #include #include -#include namespace boost{ namespace math{ namespace quadrature { @@ -40,4 +48,25 @@ class sinh_sinh }; }}} -#endif + +#endif // BOOST_MATH_HAS_NVRTC + +#ifdef BOOST_MATH_ENABLE_CUDA + +namespace boost { +namespace math { +namespace quadrature { + +template > +__device__ auto sinh_sinh_integrate(const F& f, Real tol = boost::math::tools::root_epsilon(), Real* error = nullptr, Real* L1 = nullptr, boost::math::size_t* levels = nullptr) +{ + return detail::sinh_sinh_integrate_impl(f, tol, error, L1, levels); +} + +} // namespace quadrature +} // namespace math +} // namespace boost + +#endif // BOOST_MATH_ENABLE_CUDA + +#endif // BOOST_MATH_QUADRATURE_SINH_SINH_HPP diff --git a/include/boost/math/special_functions/airy.hpp b/include/boost/math/special_functions/airy.hpp index 06eee92383..65114089a6 100644 --- a/include/boost/math/special_functions/airy.hpp +++ b/include/boost/math/special_functions/airy.hpp @@ -1,4 +1,5 @@ // Copyright John Maddock 2012. +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt @@ -7,19 +8,24 @@ #ifndef BOOST_MATH_AIRY_HPP #define BOOST_MATH_AIRY_HPP -#include +#include +#include +#include +#include #include #include #include #include #include +#include +#include namespace boost{ namespace math{ namespace detail{ template -T airy_ai_imp(T x, const Policy& pol) +BOOST_MATH_GPU_ENABLED T airy_ai_imp(T x, const Policy& pol) { BOOST_MATH_STD_USING @@ -57,7 +63,7 @@ T airy_ai_imp(T x, const Policy& pol) } template -T airy_bi_imp(T x, const Policy& pol) +BOOST_MATH_GPU_ENABLED T airy_bi_imp(T x, const Policy& pol) { BOOST_MATH_STD_USING @@ -90,7 +96,7 @@ T airy_bi_imp(T x, const Policy& pol) } template -T airy_ai_prime_imp(T x, const Policy& pol) +BOOST_MATH_GPU_ENABLED T airy_ai_prime_imp(T x, const Policy& pol) { BOOST_MATH_STD_USING @@ -125,7 +131,7 @@ T airy_ai_prime_imp(T x, const Policy& pol) } template -T airy_bi_prime_imp(T x, const Policy& pol) +BOOST_MATH_GPU_ENABLED T airy_bi_prime_imp(T x, const Policy& pol) { BOOST_MATH_STD_USING @@ -156,7 +162,7 @@ T airy_bi_prime_imp(T x, const Policy& pol) } template -T airy_ai_zero_imp(int m, const Policy& pol) +BOOST_MATH_GPU_ENABLED T airy_ai_zero_imp(int m, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std names, needed for log, sqrt. @@ -209,7 +215,7 @@ T airy_ai_zero_imp(int m, const Policy& pol) } template -T airy_bi_zero_imp(int m, const Policy& pol) +BOOST_MATH_GPU_ENABLED T airy_bi_zero_imp(int m, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std names, needed for log, sqrt. @@ -263,7 +269,7 @@ T airy_bi_zero_imp(int m, const Policy& pol) } // namespace detail template -inline typename tools::promote_args::type airy_ai(T x, const Policy&) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type airy_ai(T x, const Policy&) { BOOST_FPU_EXCEPTION_GUARD typedef typename tools::promote_args::type result_type; @@ -279,13 +285,13 @@ inline typename tools::promote_args::type airy_ai(T x, const Policy&) } template -inline typename tools::promote_args::type airy_ai(T x) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type airy_ai(T x) { return airy_ai(x, policies::policy<>()); } template -inline typename tools::promote_args::type airy_bi(T x, const Policy&) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type airy_bi(T x, const Policy&) { BOOST_FPU_EXCEPTION_GUARD typedef typename tools::promote_args::type result_type; @@ -301,13 +307,13 @@ inline typename tools::promote_args::type airy_bi(T x, const Policy&) } template -inline typename tools::promote_args::type airy_bi(T x) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type airy_bi(T x) { return airy_bi(x, policies::policy<>()); } template -inline typename tools::promote_args::type airy_ai_prime(T x, const Policy&) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type airy_ai_prime(T x, const Policy&) { BOOST_FPU_EXCEPTION_GUARD typedef typename tools::promote_args::type result_type; @@ -323,13 +329,13 @@ inline typename tools::promote_args::type airy_ai_prime(T x, const Policy&) } template -inline typename tools::promote_args::type airy_ai_prime(T x) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type airy_ai_prime(T x) { return airy_ai_prime(x, policies::policy<>()); } template -inline typename tools::promote_args::type airy_bi_prime(T x, const Policy&) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type airy_bi_prime(T x, const Policy&) { BOOST_FPU_EXCEPTION_GUARD typedef typename tools::promote_args::type result_type; @@ -345,13 +351,13 @@ inline typename tools::promote_args::type airy_bi_prime(T x, const Policy&) } template -inline typename tools::promote_args::type airy_bi_prime(T x) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type airy_bi_prime(T x) { return airy_bi_prime(x, policies::policy<>()); } template -inline T airy_ai_zero(int m, const Policy& /*pol*/) +BOOST_MATH_GPU_ENABLED inline T airy_ai_zero(int m, const Policy& /*pol*/) { BOOST_FPU_EXCEPTION_GUARD typedef typename policies::evaluation::type value_type; @@ -371,13 +377,13 @@ inline T airy_ai_zero(int m, const Policy& /*pol*/) } template -inline T airy_ai_zero(int m) +BOOST_MATH_GPU_ENABLED inline T airy_ai_zero(int m) { return airy_ai_zero(m, policies::policy<>()); } template -inline OutputIterator airy_ai_zero( +BOOST_MATH_GPU_ENABLED inline OutputIterator airy_ai_zero( int start_index, unsigned number_of_zeros, OutputIterator out_it, @@ -399,7 +405,7 @@ inline OutputIterator airy_ai_zero( } template -inline OutputIterator airy_ai_zero( +BOOST_MATH_GPU_ENABLED inline OutputIterator airy_ai_zero( int start_index, unsigned number_of_zeros, OutputIterator out_it) @@ -408,7 +414,7 @@ inline OutputIterator airy_ai_zero( } template -inline T airy_bi_zero(int m, const Policy& /*pol*/) +BOOST_MATH_GPU_ENABLED inline T airy_bi_zero(int m, const Policy& /*pol*/) { BOOST_FPU_EXCEPTION_GUARD typedef typename policies::evaluation::type value_type; @@ -428,13 +434,13 @@ inline T airy_bi_zero(int m, const Policy& /*pol*/) } template -inline T airy_bi_zero(int m) +BOOST_MATH_GPU_ENABLED inline T airy_bi_zero(int m) { return airy_bi_zero(m, policies::policy<>()); } template -inline OutputIterator airy_bi_zero( +BOOST_MATH_GPU_ENABLED inline OutputIterator airy_bi_zero( int start_index, unsigned number_of_zeros, OutputIterator out_it, @@ -456,7 +462,7 @@ inline OutputIterator airy_bi_zero( } template -inline OutputIterator airy_bi_zero( +BOOST_MATH_GPU_ENABLED inline OutputIterator airy_bi_zero( int start_index, unsigned number_of_zeros, OutputIterator out_it) diff --git a/include/boost/math/special_functions/atanh.hpp b/include/boost/math/special_functions/atanh.hpp index 543fb5fce3..9d73e568c0 100644 --- a/include/boost/math/special_functions/atanh.hpp +++ b/include/boost/math/special_functions/atanh.hpp @@ -15,7 +15,7 @@ #pragma once #endif -#include +#include #include #include #include @@ -33,10 +33,10 @@ namespace boost // This is the main fare template - inline T atanh_imp(const T x, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline T atanh_imp(const T x, const Policy& pol) { BOOST_MATH_STD_USING - static const char* function = "boost::math::atanh<%1%>(%1%)"; + constexpr auto function = "boost::math::atanh<%1%>(%1%)"; if(x < -1) { @@ -87,7 +87,7 @@ namespace boost } template - inline typename tools::promote_args::type atanh(T x, const Policy&) + BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type atanh(T x, const Policy&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -102,7 +102,7 @@ namespace boost "boost::math::atanh<%1%>(%1%)"); } template - inline typename tools::promote_args::type atanh(T x) + BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type atanh(T x) { return boost::math::atanh(x, policies::policy<>()); } diff --git a/include/boost/math/special_functions/bessel.hpp b/include/boost/math/special_functions/bessel.hpp index e9677d3c79..c32f251bcd 100644 --- a/include/boost/math/special_functions/bessel.hpp +++ b/include/boost/math/special_functions/bessel.hpp @@ -15,8 +15,14 @@ # pragma once #endif -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include @@ -31,10 +37,8 @@ #include #include #include -#include -#include -#include -#include +#include +#include #ifdef _MSC_VER # pragma warning(push) @@ -50,7 +54,7 @@ struct sph_bessel_j_small_z_series_term { typedef T result_type; - sph_bessel_j_small_z_series_term(unsigned v_, T x) + BOOST_MATH_GPU_ENABLED sph_bessel_j_small_z_series_term(unsigned v_, T x) : N(0), v(v_) { BOOST_MATH_STD_USING @@ -64,7 +68,7 @@ struct sph_bessel_j_small_z_series_term term = pow(mult, T(v)) / boost::math::tgamma(v+1+T(0.5f), Policy()); mult *= -mult; } - T operator()() + BOOST_MATH_GPU_ENABLED T operator()() { T r = term; ++N; @@ -79,11 +83,11 @@ struct sph_bessel_j_small_z_series_term }; template -inline T sph_bessel_j_small_z_series(unsigned v, T x, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T sph_bessel_j_small_z_series(unsigned v, T x, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std names sph_bessel_j_small_z_series_term s(v, x); - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); T result = boost::math::tools::sum_series(s, boost::math::policies::get_epsilon(), max_iter); @@ -92,10 +96,21 @@ inline T sph_bessel_j_small_z_series(unsigned v, T x, const Policy& pol) } template -T cyl_bessel_j_imp(T v, T x, const bessel_no_int_tag& t, const Policy& pol) +BOOST_MATH_GPU_ENABLED T cyl_bessel_j_imp_final(T v, T x, const bessel_no_int_tag& t, const Policy& pol) { BOOST_MATH_STD_USING - static const char* function = "boost::math::bessel_j<%1%>(%1%,%1%)"; + + T result_J, y; // LCOV_EXCL_LINE + bessel_jy(v, x, &result_J, &y, need_j, pol); + return result_J; +} + +// Dispatch funtion to avoid recursion +template +BOOST_MATH_GPU_ENABLED T cyl_bessel_j_imp(T v, T x, const bessel_no_int_tag& t, const Policy& pol) +{ + BOOST_MATH_STD_USING + if(x < 0) { // better have integer v: @@ -105,23 +120,27 @@ T cyl_bessel_j_imp(T v, T x, const bessel_no_int_tag& t, const Policy& pol) // This branch is hit by multiprecision types only, and is // tested by our real_concept tests, but thee are excluded from coverage // due to time constraints. - T r = cyl_bessel_j_imp(v, T(-x), t, pol); + T r = cyl_bessel_j_imp_final(T(v), T(-x), t, pol); if (iround(v, pol) & 1) + { r = -r; + } + return r; // LCOV_EXCL_STOP } else + { + constexpr auto function = "boost::math::bessel_j<%1%>(%1%,%1%)"; return policies::raise_domain_error(function, "Got x = %1%, but we need x >= 0", x, pol); + } } - T result_J, y; // LCOV_EXCL_LINE - bessel_jy(v, x, &result_J, &y, need_j, pol); - return result_J; + return cyl_bessel_j_imp_final(T(v), T(x), t, pol); } template -inline T cyl_bessel_j_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T cyl_bessel_j_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std names. int ival = detail::iconv(v, pol); @@ -135,14 +154,14 @@ inline T cyl_bessel_j_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& p } template -inline T cyl_bessel_j_imp(int v, T x, const bessel_int_tag&, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T cyl_bessel_j_imp(int v, T x, const bessel_int_tag&, const Policy& pol) { BOOST_MATH_STD_USING return bessel_jn(v, x, pol); } template -inline T sph_bessel_j_imp(unsigned n, T x, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T sph_bessel_j_imp(unsigned n, T x, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std names if(x < 0) @@ -171,7 +190,7 @@ inline T sph_bessel_j_imp(unsigned n, T x, const Policy& pol) } template -T cyl_bessel_i_imp(T v, T x, const Policy& pol) +BOOST_MATH_GPU_ENABLED T cyl_bessel_i_imp_final(T v, T x, const Policy& pol) { // // This handles all the bessel I functions, note that we don't optimise @@ -180,20 +199,7 @@ T cyl_bessel_i_imp(T v, T x, const Policy& pol) // case has better error handling too). // BOOST_MATH_STD_USING - static const char* function = "boost::math::cyl_bessel_i<%1%>(%1%,%1%)"; - if(x < 0) - { - // better have integer v: - if(floor(v) == v) - { - T r = cyl_bessel_i_imp(v, T(-x), pol); - if(iround(v, pol) & 1) - r = -r; - return r; - } - else - return policies::raise_domain_error(function, "Got x = %1%, but we need x >= 0", x, pol); - } + constexpr auto function = "boost::math::cyl_bessel_i<%1%>(%1%,%1%)"; if(x == 0) { if(v < 0) @@ -210,7 +216,7 @@ T cyl_bessel_i_imp(T v, T x, const Policy& pol) } return sqrt(2 / (x * constants::pi())) * sinh(x); } - if((policies::digits() <= 113) && (std::numeric_limits::digits <= 113) && (std::numeric_limits::radix == 2)) + if((policies::digits() <= 113) && (boost::math::numeric_limits::digits <= 113) && (boost::math::numeric_limits::radix == 2)) { if(v == 0) { @@ -228,10 +234,39 @@ T cyl_bessel_i_imp(T v, T x, const Policy& pol) return result_I; } +// Additional dispatch function to get the GPU impls happy +template +BOOST_MATH_GPU_ENABLED T cyl_bessel_i_imp(T v, T x, const Policy& pol) +{ + BOOST_MATH_STD_USING + constexpr auto function = "boost::math::cyl_bessel_i<%1%>(%1%,%1%)"; + + if(x < 0) + { + // better have integer v: + if(floor(v) == v) + { + T r = cyl_bessel_i_imp_final(T(v), T(-x), pol); + if(iround(v, pol) & 1) + { + r = -r; + } + + return r; + } + else + { + return policies::raise_domain_error(function, "Got x = %1%, but we need x >= 0", x, pol); + } + } + + return cyl_bessel_i_imp_final(T(v), T(x), pol); +} + template -inline T cyl_bessel_k_imp(T v, T x, const bessel_no_int_tag& /* t */, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T cyl_bessel_k_imp(T v, T x, const bessel_no_int_tag& /* t */, const Policy& pol) { - static const char* function = "boost::math::cyl_bessel_k<%1%>(%1%,%1%)"; + constexpr auto function = "boost::math::cyl_bessel_k<%1%>(%1%,%1%)"; BOOST_MATH_STD_USING if(x < 0) { @@ -248,7 +283,7 @@ inline T cyl_bessel_k_imp(T v, T x, const bessel_no_int_tag& /* t */, const Poli } template -inline T cyl_bessel_k_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T cyl_bessel_k_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& pol) { BOOST_MATH_STD_USING if((floor(v) == v)) @@ -259,15 +294,15 @@ inline T cyl_bessel_k_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& p } template -inline T cyl_bessel_k_imp(int v, T x, const bessel_int_tag&, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T cyl_bessel_k_imp(int v, T x, const bessel_int_tag&, const Policy& pol) { return bessel_kn(v, x, pol); } template -inline T cyl_neumann_imp(T v, T x, const bessel_no_int_tag&, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T cyl_neumann_imp(T v, T x, const bessel_no_int_tag&, const Policy& pol) { - static const char* function = "boost::math::cyl_neumann<%1%>(%1%,%1%)"; + constexpr auto function = "boost::math::cyl_neumann<%1%>(%1%,%1%)"; BOOST_MATH_INSTRUMENT_VARIABLE(v); BOOST_MATH_INSTRUMENT_VARIABLE(x); @@ -291,7 +326,7 @@ inline T cyl_neumann_imp(T v, T x, const bessel_no_int_tag&, const Policy& pol) } template -inline T cyl_neumann_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T cyl_neumann_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& pol) { BOOST_MATH_STD_USING @@ -310,16 +345,16 @@ inline T cyl_neumann_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& po } template -inline T cyl_neumann_imp(int v, T x, const bessel_int_tag&, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T cyl_neumann_imp(int v, T x, const bessel_int_tag&, const Policy& pol) { return bessel_yn(v, x, pol); } template -inline T sph_neumann_imp(unsigned v, T x, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T sph_neumann_imp(unsigned v, T x, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std names - static const char* function = "boost::math::sph_neumann<%1%>(%1%,%1%)"; + constexpr auto function = "boost::math::sph_neumann<%1%>(%1%,%1%)"; // // Nothing much to do here but check for errors, and // evaluate the function's definition directly: @@ -340,11 +375,11 @@ inline T sph_neumann_imp(unsigned v, T x, const Policy& pol) } template -inline T cyl_bessel_j_zero_imp(T v, int m, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T cyl_bessel_j_zero_imp(T v, int m, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std names, needed for floor. - static const char* function = "boost::math::cyl_bessel_j_zero<%1%>(%1%, int)"; + constexpr auto function = "boost::math::cyl_bessel_j_zero<%1%>(%1%, int)"; const T half_epsilon(boost::math::tools::epsilon() / 2U); @@ -395,7 +430,7 @@ inline T cyl_bessel_j_zero_imp(T v, int m, const Policy& pol) const T guess_root = boost::math::detail::bessel_zero::cyl_bessel_j_zero_detail::initial_guess((order_is_integer ? vv : v), m, pol); // Select the maximum allowed iterations from the policy. - std::uintmax_t number_of_iterations = policies::get_max_root_iterations(); + boost::math::uintmax_t number_of_iterations = policies::get_max_root_iterations(); const T delta_lo = ((guess_root > 0.2F) ? T(0.2) : T(guess_root / 2U)); @@ -418,11 +453,11 @@ inline T cyl_bessel_j_zero_imp(T v, int m, const Policy& pol) } template -inline T cyl_neumann_zero_imp(T v, int m, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T cyl_neumann_zero_imp(T v, int m, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std names, needed for floor. - static const char* function = "boost::math::cyl_neumann_zero<%1%>(%1%, int)"; + constexpr auto function = "boost::math::cyl_neumann_zero<%1%>(%1%, int)"; // Handle non-finite order. if (!(boost::math::isfinite)(v) ) @@ -473,7 +508,7 @@ inline T cyl_neumann_zero_imp(T v, int m, const Policy& pol) const T guess_root = boost::math::detail::bessel_zero::cyl_neumann_zero_detail::initial_guess(v, m, pol); // Select the maximum allowed iterations from the policy. - std::uintmax_t number_of_iterations = policies::get_max_root_iterations(); + boost::math::uintmax_t number_of_iterations = policies::get_max_root_iterations(); const T delta_lo = ((guess_root > 0.2F) ? T(0.2) : T(guess_root / 2U)); @@ -498,7 +533,7 @@ inline T cyl_neumann_zero_imp(T v, int m, const Policy& pol) } // namespace detail template -inline typename detail::bessel_traits::result_type cyl_bessel_j(T1 v, T2 x, const Policy& /* pol */) +BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits::result_type cyl_bessel_j(T1 v, T2 x, const Policy& /* pol */) { BOOST_FPU_EXCEPTION_GUARD typedef typename detail::bessel_traits::result_type result_type; @@ -514,13 +549,13 @@ inline typename detail::bessel_traits::result_type cyl_bessel_j( } template -inline typename detail::bessel_traits >::result_type cyl_bessel_j(T1 v, T2 x) +BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits >::result_type cyl_bessel_j(T1 v, T2 x) { return cyl_bessel_j(v, x, policies::policy<>()); } template -inline typename detail::bessel_traits::result_type sph_bessel(unsigned v, T x, const Policy& /* pol */) +BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits::result_type sph_bessel(unsigned v, T x, const Policy& /* pol */) { BOOST_FPU_EXCEPTION_GUARD typedef typename detail::bessel_traits::result_type result_type; @@ -535,13 +570,13 @@ inline typename detail::bessel_traits::result_type sph_bessel(unsi } template -inline typename detail::bessel_traits >::result_type sph_bessel(unsigned v, T x) +BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits >::result_type sph_bessel(unsigned v, T x) { return sph_bessel(v, x, policies::policy<>()); } template -inline typename detail::bessel_traits::result_type cyl_bessel_i(T1 v, T2 x, const Policy& /* pol */) +BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits::result_type cyl_bessel_i(T1 v, T2 x, const Policy& /* pol */) { BOOST_FPU_EXCEPTION_GUARD typedef typename detail::bessel_traits::result_type result_type; @@ -556,13 +591,13 @@ inline typename detail::bessel_traits::result_type cyl_bessel_i( } template -inline typename detail::bessel_traits >::result_type cyl_bessel_i(T1 v, T2 x) +BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits >::result_type cyl_bessel_i(T1 v, T2 x) { return cyl_bessel_i(v, x, policies::policy<>()); } template -inline typename detail::bessel_traits::result_type cyl_bessel_k(T1 v, T2 x, const Policy& /* pol */) +BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits::result_type cyl_bessel_k(T1 v, T2 x, const Policy& /* pol */) { BOOST_FPU_EXCEPTION_GUARD typedef typename detail::bessel_traits::result_type result_type; @@ -578,13 +613,13 @@ inline typename detail::bessel_traits::result_type cyl_bessel_k( } template -inline typename detail::bessel_traits >::result_type cyl_bessel_k(T1 v, T2 x) +BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits >::result_type cyl_bessel_k(T1 v, T2 x) { return cyl_bessel_k(v, x, policies::policy<>()); } template -inline typename detail::bessel_traits::result_type cyl_neumann(T1 v, T2 x, const Policy& /* pol */) +BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits::result_type cyl_neumann(T1 v, T2 x, const Policy& /* pol */) { BOOST_FPU_EXCEPTION_GUARD typedef typename detail::bessel_traits::result_type result_type; @@ -600,13 +635,13 @@ inline typename detail::bessel_traits::result_type cyl_neumann(T } template -inline typename detail::bessel_traits >::result_type cyl_neumann(T1 v, T2 x) +BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits >::result_type cyl_neumann(T1 v, T2 x) { return cyl_neumann(v, x, policies::policy<>()); } template -inline typename detail::bessel_traits::result_type sph_neumann(unsigned v, T x, const Policy& /* pol */) +BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits::result_type sph_neumann(unsigned v, T x, const Policy& /* pol */) { BOOST_FPU_EXCEPTION_GUARD typedef typename detail::bessel_traits::result_type result_type; @@ -621,13 +656,13 @@ inline typename detail::bessel_traits::result_type sph_neumann(uns } template -inline typename detail::bessel_traits >::result_type sph_neumann(unsigned v, T x) +BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits >::result_type sph_neumann(unsigned v, T x) { return sph_neumann(v, x, policies::policy<>()); } template -inline typename detail::bessel_traits::result_type cyl_bessel_j_zero(T v, int m, const Policy& /* pol */) +BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits::result_type cyl_bessel_j_zero(T v, int m, const Policy& /* pol */) { BOOST_FPU_EXCEPTION_GUARD typedef typename detail::bessel_traits::result_type result_type; @@ -639,35 +674,35 @@ inline typename detail::bessel_traits::result_type cyl_bessel_j_ze policies::discrete_quantile<>, policies::assert_undefined<> >::type forwarding_policy; - static_assert( false == std::numeric_limits::is_specialized - || ( true == std::numeric_limits::is_specialized - && false == std::numeric_limits::is_integer), + static_assert( false == boost::math::numeric_limits::is_specialized + || ( true == boost::math::numeric_limits::is_specialized + && false == boost::math::numeric_limits::is_integer), "Order must be a floating-point type."); return policies::checked_narrowing_cast(detail::cyl_bessel_j_zero_imp(v, m, forwarding_policy()), "boost::math::cyl_bessel_j_zero<%1%>(%1%,%1%)"); } template -inline typename detail::bessel_traits >::result_type cyl_bessel_j_zero(T v, int m) +BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits >::result_type cyl_bessel_j_zero(T v, int m) { - static_assert( false == std::numeric_limits::is_specialized - || ( true == std::numeric_limits::is_specialized - && false == std::numeric_limits::is_integer), + static_assert( false == boost::math::numeric_limits::is_specialized + || ( true == boost::math::numeric_limits::is_specialized + && false == boost::math::numeric_limits::is_integer), "Order must be a floating-point type."); return cyl_bessel_j_zero >(v, m, policies::policy<>()); } template -inline OutputIterator cyl_bessel_j_zero(T v, +BOOST_MATH_GPU_ENABLED inline OutputIterator cyl_bessel_j_zero(T v, int start_index, unsigned number_of_zeros, OutputIterator out_it, const Policy& pol) { - static_assert( false == std::numeric_limits::is_specialized - || ( true == std::numeric_limits::is_specialized - && false == std::numeric_limits::is_integer), + static_assert( false == boost::math::numeric_limits::is_specialized + || ( true == boost::math::numeric_limits::is_specialized + && false == boost::math::numeric_limits::is_integer), "Order must be a floating-point type."); for(int i = 0; i < static_cast(number_of_zeros); ++i) @@ -679,7 +714,7 @@ inline OutputIterator cyl_bessel_j_zero(T v, } template -inline OutputIterator cyl_bessel_j_zero(T v, +BOOST_MATH_GPU_ENABLED inline OutputIterator cyl_bessel_j_zero(T v, int start_index, unsigned number_of_zeros, OutputIterator out_it) @@ -688,7 +723,7 @@ inline OutputIterator cyl_bessel_j_zero(T v, } template -inline typename detail::bessel_traits::result_type cyl_neumann_zero(T v, int m, const Policy& /* pol */) +BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits::result_type cyl_neumann_zero(T v, int m, const Policy& /* pol */) { BOOST_FPU_EXCEPTION_GUARD typedef typename detail::bessel_traits::result_type result_type; @@ -700,35 +735,35 @@ inline typename detail::bessel_traits::result_type cyl_neumann_zer policies::discrete_quantile<>, policies::assert_undefined<> >::type forwarding_policy; - static_assert( false == std::numeric_limits::is_specialized - || ( true == std::numeric_limits::is_specialized - && false == std::numeric_limits::is_integer), + static_assert( false == boost::math::numeric_limits::is_specialized + || ( true == boost::math::numeric_limits::is_specialized + && false == boost::math::numeric_limits::is_integer), "Order must be a floating-point type."); return policies::checked_narrowing_cast(detail::cyl_neumann_zero_imp(v, m, forwarding_policy()), "boost::math::cyl_neumann_zero<%1%>(%1%,%1%)"); } template -inline typename detail::bessel_traits >::result_type cyl_neumann_zero(T v, int m) +BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits >::result_type cyl_neumann_zero(T v, int m) { - static_assert( false == std::numeric_limits::is_specialized - || ( true == std::numeric_limits::is_specialized - && false == std::numeric_limits::is_integer), + static_assert( false == boost::math::numeric_limits::is_specialized + || ( true == boost::math::numeric_limits::is_specialized + && false == boost::math::numeric_limits::is_integer), "Order must be a floating-point type."); return cyl_neumann_zero >(v, m, policies::policy<>()); } template -inline OutputIterator cyl_neumann_zero(T v, +BOOST_MATH_GPU_ENABLED inline OutputIterator cyl_neumann_zero(T v, int start_index, unsigned number_of_zeros, OutputIterator out_it, const Policy& pol) { - static_assert( false == std::numeric_limits::is_specialized - || ( true == std::numeric_limits::is_specialized - && false == std::numeric_limits::is_integer), + static_assert( false == boost::math::numeric_limits::is_specialized + || ( true == boost::math::numeric_limits::is_specialized + && false == boost::math::numeric_limits::is_integer), "Order must be a floating-point type."); for(int i = 0; i < static_cast(number_of_zeros); ++i) @@ -740,7 +775,7 @@ inline OutputIterator cyl_neumann_zero(T v, } template -inline OutputIterator cyl_neumann_zero(T v, +BOOST_MATH_GPU_ENABLED inline OutputIterator cyl_neumann_zero(T v, int start_index, unsigned number_of_zeros, OutputIterator out_it) diff --git a/include/boost/math/special_functions/beta.hpp b/include/boost/math/special_functions/beta.hpp index c36e1f0d0c..27901a1131 100644 --- a/include/boost/math/special_functions/beta.hpp +++ b/include/boost/math/special_functions/beta.hpp @@ -1,4 +1,5 @@ // (C) Copyright John Maddock 2006. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -10,18 +11,27 @@ #pragma once #endif -#include #include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include #include #include #include #include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include namespace boost{ namespace math{ @@ -31,7 +41,7 @@ namespace detail{ // Implementation of Beta(a,b) using the Lanczos approximation: // template -T beta_imp(T a, T b, const Lanczos&, const Policy& pol) +BOOST_MATH_GPU_ENABLED T beta_imp(T a, T b, const Lanczos&, const Policy& pol) { BOOST_MATH_STD_USING // for ADL of std names @@ -85,7 +95,9 @@ T beta_imp(T a, T b, const Lanczos&, const Policy& pol) */ if(a < b) - std::swap(a, b); + { + BOOST_MATH_GPU_SAFE_SWAP(a, b); + } // Lanczos calculation: T agh = static_cast(a + Lanczos::g() - 0.5f); @@ -120,8 +132,9 @@ T beta_imp(T a, T b, const Lanczos&, const Policy& pol) // Generic implementation of Beta(a,b) without Lanczos approximation support // (Caution this is slow!!!): // +#ifndef BOOST_MATH_HAS_GPU_SUPPORT template -T beta_imp(T a, T b, const lanczos::undefined_lanczos& l, const Policy& pol) +BOOST_MATH_GPU_ENABLED T beta_imp(T a, T b, const lanczos::undefined_lanczos& l, const Policy& pol) { BOOST_MATH_STD_USING @@ -190,7 +203,7 @@ T beta_imp(T a, T b, const lanczos::undefined_lanczos& l, const Policy& pol) } } // template T beta_imp(T a, T b, const lanczos::undefined_lanczos& l) - +#endif // // Compute the leading power terms in the incomplete Beta: @@ -204,7 +217,7 @@ T beta_imp(T a, T b, const lanczos::undefined_lanczos& l, const Policy& pol) // horrendous cancellation errors. // template -T ibeta_power_terms(T a, +BOOST_MATH_GPU_ENABLED T ibeta_power_terms(T a, T b, T x, T y, @@ -242,11 +255,11 @@ T ibeta_power_terms(T a, // l1 and l2 are the base of the exponents minus one: T l1 = (x * b - y * agh) / agh; T l2 = (y * a - x * bgh) / bgh; - if(((std::min)(fabs(l1), fabs(l2)) < 0.2)) + if((BOOST_MATH_GPU_SAFE_MIN(fabs(l1), fabs(l2)) < 0.2)) { // when the base of the exponent is very near 1 we get really // gross errors unless extra care is taken: - if((l1 * l2 > 0) || ((std::min)(a, b) < 1)) + if((l1 * l2 > 0) || (BOOST_MATH_GPU_SAFE_MIN(a, b) < 1)) { // // This first branch handles the simple cases where either: @@ -282,7 +295,7 @@ T ibeta_power_terms(T a, BOOST_MATH_INSTRUMENT_VARIABLE(result); } } - else if((std::max)(fabs(l1), fabs(l2)) < 0.5) + else if(BOOST_MATH_GPU_SAFE_MAX(fabs(l1), fabs(l2)) < 0.5) { // // Both exponents are near one and both the exponents are @@ -444,8 +457,9 @@ T ibeta_power_terms(T a, // // This version is generic, slow, and does not use the Lanczos approximation. // +#ifndef BOOST_MATH_HAS_GPU_SUPPORT template -T ibeta_power_terms(T a, +BOOST_MATH_GPU_ENABLED T ibeta_power_terms(T a, T b, T x, T y, @@ -480,7 +494,7 @@ T ibeta_power_terms(T a, bool need_logs = false; if (a < b) { - BOOST_MATH_IF_CONSTEXPR(std::numeric_limits::has_infinity) + BOOST_MATH_IF_CONSTEXPR(boost::math::numeric_limits::has_infinity) { power1 = pow((x * y * c * c) / (a * b), a); power2 = pow((y * c) / b, b - a); @@ -503,7 +517,7 @@ T ibeta_power_terms(T a, } else { - BOOST_MATH_IF_CONSTEXPR(std::numeric_limits::has_infinity) + BOOST_MATH_IF_CONSTEXPR(boost::math::numeric_limits::has_infinity) { power1 = pow((x * y * c * c) / (a * b), b); power2 = pow((x * c) / a, a - b); @@ -522,7 +536,7 @@ T ibeta_power_terms(T a, need_logs = true; } } - BOOST_MATH_IF_CONSTEXPR(std::numeric_limits::has_infinity) + BOOST_MATH_IF_CONSTEXPR(boost::math::numeric_limits::has_infinity) { if (!(boost::math::isnormal)(power1) || !(boost::math::isnormal)(power2)) { @@ -554,7 +568,7 @@ T ibeta_power_terms(T a, // exp(a * log1p((xb - ya) / a + p + p(xb - ya) / a)) // // Analogously, when a > b we can just swap all the terms around. - // + // // Finally, there are a few cases (x or y is unity) when the above logic can't be used // or where there is no logarithmic cancellation and accuracy is better just using // the regular formula: @@ -621,6 +635,8 @@ T ibeta_power_terms(T a, } return prefix * power1 * (power2 / bet); } + +#endif // // Series approximation to the incomplete beta: // @@ -628,8 +644,8 @@ template struct ibeta_series_t { typedef T result_type; - ibeta_series_t(T a_, T b_, T x_, T mult) : result(mult), x(x_), apn(a_), poch(1-b_), n(1) {} - T operator()() + BOOST_MATH_GPU_ENABLED ibeta_series_t(T a_, T b_, T x_, T mult) : result(mult), x(x_), apn(a_), poch(1-b_), n(1) {} + BOOST_MATH_GPU_ENABLED T operator()() { T r = result / apn; apn += 1; @@ -644,7 +660,7 @@ struct ibeta_series_t }; template -T ibeta_series(T a, T b, T x, T s0, const Lanczos&, bool normalised, T* p_derivative, T y, const Policy& pol) +BOOST_MATH_GPU_ENABLED T ibeta_series(T a, T b, T x, T s0, const Lanczos&, bool normalised, T* p_derivative, T y, const Policy& pol) { BOOST_MATH_STD_USING @@ -713,7 +729,7 @@ T ibeta_series(T a, T b, T x, T s0, const Lanczos&, bool normalised, T* p_deriva if(result < tools::min_value()) return s0; // Safeguard: series can't cope with denorms. ibeta_series_t s(a, b, x, result); - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); result = boost::math::tools::sum_series(s, boost::math::policies::get_epsilon(), max_iter, s0); policies::check_series_iterations("boost::math::ibeta<%1%>(%1%, %1%, %1%) in ibeta_series (with lanczos)", max_iter, pol); return result; @@ -721,8 +737,9 @@ T ibeta_series(T a, T b, T x, T s0, const Lanczos&, bool normalised, T* p_deriva // // Incomplete Beta series again, this time without Lanczos support: // +#ifndef BOOST_MATH_HAS_GPU_SUPPORT template -T ibeta_series(T a, T b, T x, T s0, const boost::math::lanczos::undefined_lanczos& l, bool normalised, T* p_derivative, T y, const Policy& pol) +BOOST_MATH_GPU_ENABLED T ibeta_series(T a, T b, T x, T s0, const boost::math::lanczos::undefined_lanczos& l, bool normalised, T* p_derivative, T y, const Policy& pol) { BOOST_MATH_STD_USING @@ -774,23 +791,23 @@ T ibeta_series(T a, T b, T x, T s0, const boost::math::lanczos::undefined_lanczo if(result < tools::min_value()) return s0; // Safeguard: series can't cope with denorms. ibeta_series_t s(a, b, x, result); - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); result = boost::math::tools::sum_series(s, boost::math::policies::get_epsilon(), max_iter, s0); policies::check_series_iterations("boost::math::ibeta<%1%>(%1%, %1%, %1%) in ibeta_series (without lanczos)", max_iter, pol); return result; } - +#endif // // Continued fraction for the incomplete beta: // template struct ibeta_fraction2_t { - typedef std::pair result_type; + typedef boost::math::pair result_type; - ibeta_fraction2_t(T a_, T b_, T x_, T y_) : a(a_), b(b_), x(x_), y(y_), m(0) {} + BOOST_MATH_GPU_ENABLED ibeta_fraction2_t(T a_, T b_, T x_, T y_) : a(a_), b(b_), x(x_), y(y_), m(0) {} - result_type operator()() + BOOST_MATH_GPU_ENABLED result_type operator()() { T aN = (a + m - 1) * (a + b + m - 1) * m * (b - m) * x * x; T denom = (a + 2 * m - 1); @@ -802,7 +819,7 @@ struct ibeta_fraction2_t ++m; - return std::make_pair(aN, bN); + return boost::math::make_pair(aN, bN); } private: @@ -813,7 +830,7 @@ struct ibeta_fraction2_t // Evaluate the incomplete beta via the continued fraction representation: // template -inline T ibeta_fraction2(T a, T b, T x, T y, const Policy& pol, bool normalised, T* p_derivative) +BOOST_MATH_GPU_ENABLED inline T ibeta_fraction2(T a, T b, T x, T y, const Policy& pol, bool normalised, T* p_derivative) { typedef typename lanczos::lanczos::type lanczos_type; BOOST_MATH_STD_USING @@ -836,7 +853,7 @@ inline T ibeta_fraction2(T a, T b, T x, T y, const Policy& pol, bool normalised, // Computes the difference between ibeta(a,b,x) and ibeta(a+k,b,x): // template -T ibeta_a_step(T a, T b, T x, T y, int k, const Policy& pol, bool normalised, T* p_derivative) +BOOST_MATH_GPU_ENABLED T ibeta_a_step(T a, T b, T x, T y, int k, const Policy& pol, bool normalised, T* p_derivative) { typedef typename lanczos::lanczos::type lanczos_type; @@ -863,6 +880,7 @@ T ibeta_a_step(T a, T b, T x, T y, int k, const Policy& pol, bool normalised, T* return prefix; } + // // This function is only needed for the non-regular incomplete beta, // it computes the delta in: @@ -870,7 +888,7 @@ T ibeta_a_step(T a, T b, T x, T y, int k, const Policy& pol, bool normalised, T* // it is currently only called for small k. // template -inline T rising_factorial_ratio(T a, T b, int k) +BOOST_MATH_GPU_ENABLED inline T rising_factorial_ratio(T a, T b, int k) { // calculate: // (a)(a+1)(a+2)...(a+k-1) @@ -901,33 +919,43 @@ struct Pn_size { // This is likely to be enough for ~35-50 digit accuracy // but it's hard to quantify exactly: + #ifndef BOOST_MATH_HAS_NVRTC static constexpr unsigned value = ::boost::math::max_factorial::value >= 100 ? 50 : ::boost::math::max_factorial::value >= ::boost::math::max_factorial::value ? 30 : ::boost::math::max_factorial::value >= ::boost::math::max_factorial::value ? 15 : 1; static_assert(::boost::math::max_factorial::value >= ::boost::math::max_factorial::value, "Type does not provide for 35-50 digits of accuracy."); + #else + static constexpr unsigned value = 0; // Will never be called + #endif }; template <> struct Pn_size { static constexpr unsigned value = 15; // ~8-15 digit accuracy +#ifndef BOOST_MATH_HAS_GPU_SUPPORT static_assert(::boost::math::max_factorial::value >= 30, "Type does not provide for 8-15 digits of accuracy."); +#endif }; template <> struct Pn_size { static constexpr unsigned value = 30; // 16-20 digit accuracy +#ifndef BOOST_MATH_HAS_GPU_SUPPORT static_assert(::boost::math::max_factorial::value >= 60, "Type does not provide for 16-20 digits of accuracy."); +#endif }; template <> struct Pn_size { static constexpr unsigned value = 50; // ~35-50 digit accuracy +#ifndef BOOST_MATH_HAS_GPU_SUPPORT static_assert(::boost::math::max_factorial::value >= 100, "Type does not provide for ~35-50 digits of accuracy"); +#endif }; template -T beta_small_b_large_a_series(T a, T b, T x, T y, T s0, T mult, const Policy& pol, bool normalised) +BOOST_MATH_GPU_ENABLED T beta_small_b_large_a_series(T a, T b, T x, T y, T s0, T mult, const Policy& pol, bool normalised) { typedef typename lanczos::lanczos::type lanczos_type; BOOST_MATH_STD_USING @@ -1033,7 +1061,7 @@ T beta_small_b_large_a_series(T a, T b, T x, T y, T s0, T mult, const Policy& po // complement of the binomial distribution cdf and use this finite sum. // template -T binomial_ccdf(T n, T k, T x, T y, const Policy& pol) +BOOST_MATH_GPU_ENABLED T binomial_ccdf(T n, T k, T x, T y, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std names @@ -1097,10 +1125,11 @@ T binomial_ccdf(T n, T k, T x, T y, const Policy& pol) // input range and select the right implementation method for // each domain: // + template -T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_derivative) +BOOST_MATH_GPU_ENABLED T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_derivative) { - static const char* function = "boost::math::ibeta<%1%>(%1%, %1%, %1%)"; + constexpr auto function = "boost::math::ibeta<%1%>(%1%, %1%, %1%)"; typedef typename lanczos::lanczos::type lanczos_type; BOOST_MATH_STD_USING // for ADL of std math functions. @@ -1184,8 +1213,8 @@ T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_de } if(a == 1) { - std::swap(a, b); - std::swap(x, y); + BOOST_MATH_GPU_SAFE_SWAP(a, b); + BOOST_MATH_GPU_SAFE_SWAP(x, y); invert = !invert; } if(b == 1) @@ -1214,19 +1243,19 @@ T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_de return p; } - if((std::min)(a, b) <= 1) + if(BOOST_MATH_GPU_SAFE_MIN(a, b) <= 1) { if(x > 0.5) { - std::swap(a, b); - std::swap(x, y); + BOOST_MATH_GPU_SAFE_SWAP(a, b); + BOOST_MATH_GPU_SAFE_SWAP(x, y); invert = !invert; BOOST_MATH_INSTRUMENT_VARIABLE(invert); } - if((std::max)(a, b) <= 1) + if(BOOST_MATH_GPU_SAFE_MAX(a, b) <= 1) { // Both a,b < 1: - if((a >= (std::min)(T(0.2), b)) || (pow(x, a) <= 0.9)) + if((a >= BOOST_MATH_GPU_SAFE_MIN(T(0.2), b)) || (pow(x, a) <= 0.9)) { if(!invert) { @@ -1243,8 +1272,8 @@ T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_de } else { - std::swap(a, b); - std::swap(x, y); + BOOST_MATH_GPU_SAFE_SWAP(a, b); + BOOST_MATH_GPU_SAFE_SWAP(x, y); invert = !invert; if(y >= 0.3) { @@ -1309,8 +1338,8 @@ T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_de } else { - std::swap(a, b); - std::swap(x, y); + BOOST_MATH_GPU_SAFE_SWAP(a, b); + BOOST_MATH_GPU_SAFE_SWAP(x, y); invert = !invert; if(y >= 0.3) @@ -1387,15 +1416,15 @@ T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_de } if(lambda < 0) { - std::swap(a, b); - std::swap(x, y); + BOOST_MATH_GPU_SAFE_SWAP(a, b); + BOOST_MATH_GPU_SAFE_SWAP(x, y); invert = !invert; BOOST_MATH_INSTRUMENT_VARIABLE(invert); } if(b < 40) { - if((floor(a) == a) && (floor(b) == b) && (a < static_cast((std::numeric_limits::max)() - 100)) && (y != 1)) + if((floor(a) == a) && (floor(b) == b) && (a < static_cast((boost::math::numeric_limits::max)() - 100)) && (y != 1)) { // relate to the binomial distribution and use a finite sum: T k = a - 1; @@ -1502,15 +1531,15 @@ T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_de } // template T ibeta_imp(T a, T b, T x, const Lanczos& l, bool inv, bool normalised) template -inline T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised) +BOOST_MATH_GPU_ENABLED inline T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised) { return ibeta_imp(a, b, x, pol, inv, normalised, static_cast(nullptr)); } template -T ibeta_derivative_imp(T a, T b, T x, const Policy& pol) +BOOST_MATH_GPU_ENABLED T ibeta_derivative_imp(T a, T b, T x, const Policy& pol) { - static const char* function = "ibeta_derivative<%1%>(%1%,%1%,%1%)"; + constexpr auto function = "ibeta_derivative<%1%>(%1%,%1%,%1%)"; // // start with the usual error checks: // @@ -1559,8 +1588,8 @@ T ibeta_derivative_imp(T a, T b, T x, const Policy& pol) // Some forwarding functions that disambiguate the third argument type: // template -inline typename tools::promote_args::type - beta(RT1 a, RT2 b, const Policy&, const std::true_type*) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type + beta(RT1 a, RT2 b, const Policy&, const boost::math::true_type*) { BOOST_FPU_EXCEPTION_GUARD typedef typename tools::promote_args::type result_type; @@ -1576,8 +1605,8 @@ inline typename tools::promote_args::type return policies::checked_narrowing_cast(detail::beta_imp(static_cast(a), static_cast(b), evaluation_type(), forwarding_policy()), "boost::math::beta<%1%>(%1%,%1%)"); } template -inline typename tools::promote_args::type - beta(RT1 a, RT2 b, RT3 x, const std::false_type*) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type + beta(RT1 a, RT2 b, RT3 x, const boost::math::false_type*) { return boost::math::beta(a, b, x, policies::policy<>()); } @@ -1589,7 +1618,7 @@ inline typename tools::promote_args::type // and forward to the implementation functions: // template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type beta(RT1 a, RT2 b, A arg) { using tag = typename policies::is_policy::type; @@ -1598,14 +1627,14 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type beta(RT1 a, RT2 b) { return boost::math::beta(a, b, policies::policy<>()); } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type beta(RT1 a, RT2 b, RT3 x, const Policy&) { BOOST_FPU_EXCEPTION_GUARD @@ -1622,7 +1651,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type betac(RT1 a, RT2 b, RT3 x, const Policy&) { BOOST_FPU_EXCEPTION_GUARD @@ -1638,14 +1667,14 @@ inline typename tools::promote_args::type return policies::checked_narrowing_cast(detail::ibeta_imp(static_cast(a), static_cast(b), static_cast(x), forwarding_policy(), true, false), "boost::math::betac<%1%>(%1%,%1%,%1%)"); } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type betac(RT1 a, RT2 b, RT3 x) { return boost::math::betac(a, b, x, policies::policy<>()); } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ibeta(RT1 a, RT2 b, RT3 x, const Policy&) { BOOST_FPU_EXCEPTION_GUARD @@ -1661,14 +1690,14 @@ inline typename tools::promote_args::type return policies::checked_narrowing_cast(detail::ibeta_imp(static_cast(a), static_cast(b), static_cast(x), forwarding_policy(), false, true), "boost::math::ibeta<%1%>(%1%,%1%,%1%)"); } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ibeta(RT1 a, RT2 b, RT3 x) { return boost::math::ibeta(a, b, x, policies::policy<>()); } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ibetac(RT1 a, RT2 b, RT3 x, const Policy&) { BOOST_FPU_EXCEPTION_GUARD @@ -1684,14 +1713,14 @@ inline typename tools::promote_args::type return policies::checked_narrowing_cast(detail::ibeta_imp(static_cast(a), static_cast(b), static_cast(x), forwarding_policy(), true, true), "boost::math::ibetac<%1%>(%1%,%1%,%1%)"); } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ibetac(RT1 a, RT2 b, RT3 x) { return boost::math::ibetac(a, b, x, policies::policy<>()); } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ibeta_derivative(RT1 a, RT2 b, RT3 x, const Policy&) { BOOST_FPU_EXCEPTION_GUARD @@ -1707,7 +1736,7 @@ inline typename tools::promote_args::type return policies::checked_narrowing_cast(detail::ibeta_derivative_imp(static_cast(a), static_cast(b), static_cast(x), forwarding_policy()), "boost::math::ibeta_derivative<%1%>(%1%,%1%,%1%)"); } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ibeta_derivative(RT1 a, RT2 b, RT3 x) { return boost::math::ibeta_derivative(a, b, x, policies::policy<>()); diff --git a/include/boost/math/special_functions/binomial.hpp b/include/boost/math/special_functions/binomial.hpp index e776a90bb8..3c49ff30d5 100644 --- a/include/boost/math/special_functions/binomial.hpp +++ b/include/boost/math/special_functions/binomial.hpp @@ -10,20 +10,21 @@ #pragma once #endif +#include +#include #include #include #include #include -#include namespace boost{ namespace math{ template -T binomial_coefficient(unsigned n, unsigned k, const Policy& pol) +BOOST_MATH_GPU_ENABLED T binomial_coefficient(unsigned n, unsigned k, const Policy& pol) { - static_assert(!std::is_integral::value, "Type T must not be an integral type"); + static_assert(!boost::math::is_integral::value, "Type T must not be an integral type"); BOOST_MATH_STD_USING - static const char* function = "boost::math::binomial_coefficient<%1%>(unsigned, unsigned)"; + constexpr auto function = "boost::math::binomial_coefficient<%1%>(unsigned, unsigned)"; if(k > n) return policies::raise_domain_error(function, "The binomial coefficient is undefined for k > n, but got k = %1%.", static_cast(k), pol); T result; // LCOV_EXCL_LINE @@ -43,9 +44,9 @@ T binomial_coefficient(unsigned n, unsigned k, const Policy& pol) { // Use the beta function: if(k < n - k) - result = static_cast(k * beta(static_cast(k), static_cast(n-k+1), pol)); + result = static_cast(k * boost::math::beta(static_cast(k), static_cast(n-k+1), pol)); else - result = static_cast((n - k) * beta(static_cast(k+1), static_cast(n-k), pol)); + result = static_cast((n - k) * boost::math::beta(static_cast(k+1), static_cast(n-k), pol)); if(result == 0) return policies::raise_overflow_error(function, nullptr, pol); result = 1 / result; @@ -59,7 +60,7 @@ T binomial_coefficient(unsigned n, unsigned k, const Policy& pol) // we'll promote to double: // template <> -inline float binomial_coefficient >(unsigned n, unsigned k, const policies::policy<>&) +BOOST_MATH_GPU_ENABLED inline float binomial_coefficient >(unsigned n, unsigned k, const policies::policy<>&) { typedef policies::normalise< policies::policy<>, @@ -71,7 +72,7 @@ inline float binomial_coefficient >(unsigned n, unsign } template -inline T binomial_coefficient(unsigned n, unsigned k) +BOOST_MATH_GPU_ENABLED inline T binomial_coefficient(unsigned n, unsigned k) { return binomial_coefficient(n, k, policies::policy<>()); } diff --git a/include/boost/math/special_functions/cbrt.hpp b/include/boost/math/special_functions/cbrt.hpp index 77cd5f0aec..7fdf78d014 100644 --- a/include/boost/math/special_functions/cbrt.hpp +++ b/include/boost/math/special_functions/cbrt.hpp @@ -1,4 +1,5 @@ // (C) Copyright John Maddock 2006. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -10,12 +11,16 @@ #pragma once #endif +#include + +#ifndef BOOST_MATH_HAS_NVRTC + #include +#include +#include #include #include #include -#include -#include namespace boost{ namespace math{ @@ -38,7 +43,7 @@ struct largest_cbrt_int_type }; template -T cbrt_imp(T z, const Policy& pol) +BOOST_MATH_GPU_ENABLED T cbrt_imp(T z, const Policy& pol) { BOOST_MATH_STD_USING // @@ -51,7 +56,7 @@ T cbrt_imp(T z, const Policy& pol) // Expected Error Term: -1.231e-006 // Maximum Relative Change in Control Points: 5.982e-004 // - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { static_cast(0.37568269008611818), static_cast(1.3304968705558024), static_cast(-1.4897101632445036), @@ -59,7 +64,7 @@ T cbrt_imp(T z, const Policy& pol) static_cast(-0.6398703759826468), static_cast(0.13584489959258635), }; - static const T correction[] = { + BOOST_MATH_STATIC const T correction[] = { static_cast(0.62996052494743658238360530363911), // 2^-2/3 static_cast(0.79370052598409973737585281963615), // 2^-1/3 static_cast(1), @@ -154,7 +159,7 @@ T cbrt_imp(T z, const Policy& pol) } // namespace detail template -inline typename tools::promote_args::type cbrt(T z, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type cbrt(T z, const Policy& pol) { using result_type = typename tools::promote_args::type; using value_type = typename policies::evaluation::type; @@ -162,7 +167,7 @@ inline typename tools::promote_args::type cbrt(T z, const Policy& pol) } template -inline typename tools::promote_args::type cbrt(T z) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type cbrt(T z) { return cbrt(z, policies::policy<>()); } @@ -170,6 +175,39 @@ inline typename tools::promote_args::type cbrt(T z) } // namespace math } // namespace boost +#else // Special NVRTC handling + +namespace boost { +namespace math { + +template +BOOST_MATH_GPU_ENABLED double cbrt(T x) +{ + return ::cbrt(x); +} + +BOOST_MATH_GPU_ENABLED inline float cbrt(float x) +{ + return ::cbrtf(x); +} + +template +BOOST_MATH_GPU_ENABLED double cbrt(T x, const Policy&) +{ + return ::cbrt(x); +} + +template +BOOST_MATH_GPU_ENABLED float cbrt(float x, const Policy&) +{ + return ::cbrtf(x); +} + +} // namespace math +} // namespace boost + +#endif // NVRTC + #endif // BOOST_MATH_SF_CBRT_HPP diff --git a/include/boost/math/special_functions/cos_pi.hpp b/include/boost/math/special_functions/cos_pi.hpp index e09700ec5e..7c33614de7 100644 --- a/include/boost/math/special_functions/cos_pi.hpp +++ b/include/boost/math/special_functions/cos_pi.hpp @@ -1,4 +1,5 @@ // Copyright (c) 2007 John Maddock +// Copyright (c) 2024 Matt Borland // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -10,10 +11,14 @@ #pragma once #endif +#include + +#ifndef BOOST_MATH_HAS_NVRTC + #include #include +#include #include -#include #include #include #include @@ -21,7 +26,7 @@ namespace boost{ namespace math{ namespace detail{ template -T cos_pi_imp(T x, const Policy&) +BOOST_MATH_GPU_ENABLED T cos_pi_imp(T x, const Policy&) { BOOST_MATH_STD_USING // ADL of std names // cos of pi*x: @@ -34,7 +39,7 @@ T cos_pi_imp(T x, const Policy&) x = -x; } T rem = floor(x); - if(abs(floor(rem/2)*2 - rem) > std::numeric_limits::epsilon()) + if(abs(floor(rem/2)*2 - rem) > boost::math::numeric_limits::epsilon()) { invert = !invert; } @@ -60,7 +65,7 @@ T cos_pi_imp(T x, const Policy&) } // namespace detail template -inline typename tools::promote_args::type cos_pi(T x, const Policy&) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type cos_pi(T x, const Policy&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -77,12 +82,47 @@ inline typename tools::promote_args::type cos_pi(T x, const Policy&) } template -inline typename tools::promote_args::type cos_pi(T x) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type cos_pi(T x) { return boost::math::cos_pi(x, policies::policy<>()); } } // namespace math } // namespace boost + +#else // Special handling for NVRTC + +namespace boost { +namespace math { + +template +BOOST_MATH_GPU_ENABLED auto cos_pi(T x) +{ + return ::cospi(x); +} + +template <> +BOOST_MATH_GPU_ENABLED auto cos_pi(float x) +{ + return ::cospif(x); +} + +template +BOOST_MATH_GPU_ENABLED auto cos_pi(T x, const Policy&) +{ + return ::cospi(x); +} + +template +BOOST_MATH_GPU_ENABLED auto cos_pi(float x, const Policy&) +{ + return ::cospif(x); +} + +} // namespace math +} // namespace boost + +#endif // BOOST_MATH_HAS_NVRTC + #endif diff --git a/include/boost/math/special_functions/detail/airy_ai_bi_zero.hpp b/include/boost/math/special_functions/detail/airy_ai_bi_zero.hpp index 7735eb8589..e518422f17 100644 --- a/include/boost/math/special_functions/detail/airy_ai_bi_zero.hpp +++ b/include/boost/math/special_functions/detail/airy_ai_bi_zero.hpp @@ -13,6 +13,8 @@ #ifndef BOOST_MATH_AIRY_AI_BI_ZERO_2013_01_20_HPP_ #define BOOST_MATH_AIRY_AI_BI_ZERO_2013_01_20_HPP_ + #include + #include #include #include @@ -21,18 +23,18 @@ { // Forward declarations of the needed Airy function implementations. template - T airy_ai_imp(T x, const Policy& pol); + BOOST_MATH_GPU_ENABLED T airy_ai_imp(T x, const Policy& pol); template - T airy_bi_imp(T x, const Policy& pol); + BOOST_MATH_GPU_ENABLED T airy_bi_imp(T x, const Policy& pol); template - T airy_ai_prime_imp(T x, const Policy& pol); + BOOST_MATH_GPU_ENABLED T airy_ai_prime_imp(T x, const Policy& pol); template - T airy_bi_prime_imp(T x, const Policy& pol); + BOOST_MATH_GPU_ENABLED T airy_bi_prime_imp(T x, const Policy& pol); namespace airy_zero { template - T equation_as_10_4_105(const T& z, const Policy& pol) + BOOST_MATH_GPU_ENABLED T equation_as_10_4_105(const T& z, const Policy& pol) { const T one_over_z (T(1) / z); const T one_over_z_squared(one_over_z * one_over_z); @@ -54,7 +56,7 @@ namespace airy_ai_zero_detail { template - T initial_guess(const int m, const Policy& pol) + BOOST_MATH_GPU_ENABLED T initial_guess(const int m, const Policy& pol) { T guess; @@ -106,11 +108,19 @@ class function_object_ai_and_ai_prime { public: - explicit function_object_ai_and_ai_prime(const Policy& pol) : my_pol(pol) { } + BOOST_MATH_GPU_ENABLED explicit function_object_ai_and_ai_prime(const Policy& pol) : my_pol(pol) { } - function_object_ai_and_ai_prime(const function_object_ai_and_ai_prime&) = default; + #ifdef BOOST_MATH_ENABLE_CUDA + # pragma nv_diag_suppress 20012 + #endif - boost::math::tuple operator()(const T& x) const + BOOST_MATH_GPU_ENABLED function_object_ai_and_ai_prime(const function_object_ai_and_ai_prime&) = default; + + #ifdef BOOST_MATH_ENABLE_CUDA + # pragma nv_diag_default 20012 + #endif + + BOOST_MATH_GPU_ENABLED boost::math::tuple operator()(const T& x) const { // Return a tuple containing both Ai(x) and Ai'(x). return boost::math::make_tuple( @@ -127,7 +137,7 @@ namespace airy_bi_zero_detail { template - T initial_guess(const int m, const Policy& pol) + BOOST_MATH_GPU_ENABLED T initial_guess(const int m, const Policy& pol) { T guess; @@ -179,11 +189,19 @@ class function_object_bi_and_bi_prime { public: - explicit function_object_bi_and_bi_prime(const Policy& pol) : my_pol(pol) { } - - function_object_bi_and_bi_prime(const function_object_bi_and_bi_prime&) = default; - - boost::math::tuple operator()(const T& x) const + BOOST_MATH_GPU_ENABLED explicit function_object_bi_and_bi_prime(const Policy& pol) : my_pol(pol) { } + + #ifdef BOOST_MATH_ENABLE_CUDA + # pragma nv_diag_suppress 20012 + #endif + + BOOST_MATH_GPU_ENABLED function_object_bi_and_bi_prime(const function_object_bi_and_bi_prime&) = default; + + #ifdef BOOST_MATH_ENABLE_CUDA + # pragma nv_diag_default 20012 + #endif + + BOOST_MATH_GPU_ENABLED boost::math::tuple operator()(const T& x) const { // Return a tuple containing both Bi(x) and Bi'(x). return boost::math::make_tuple( diff --git a/include/boost/math/special_functions/detail/bessel_i0.hpp b/include/boost/math/special_functions/detail/bessel_i0.hpp index af6e8c3794..f2219cc940 100644 --- a/include/boost/math/special_functions/detail/bessel_i0.hpp +++ b/include/boost/math/special_functions/detail/bessel_i0.hpp @@ -1,5 +1,6 @@ // Copyright (c) 2006 Xiaogang Zhang // Copyright (c) 2017 John Maddock +// Copyright (c) 2024 Matt Borland // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -14,6 +15,9 @@ #include #include #include +#include +#include +#include #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128) // @@ -35,24 +39,24 @@ namespace boost { namespace math { namespace detail{ template -T bessel_i0(const T& x); +BOOST_MATH_GPU_ENABLED T bessel_i0(const T& x); template -T bessel_i0_imp(const T&, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T bessel_i0_imp(const T&, const boost::math::integral_constant&) { BOOST_MATH_ASSERT(0); return 0; } template -T bessel_i0_imp(const T& x, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T bessel_i0_imp(const T& x, const boost::math::integral_constant&) { BOOST_MATH_STD_USING if(x < 7.75) { // Max error in interpolated form: 3.929e-08 // Max Error found at float precision = Poly: 1.991226e-07 - static const float P[] = { + BOOST_MATH_STATIC const float P[] = { 1.00000003928615375e+00f, 2.49999576572179639e-01f, 2.77785268558399407e-02f, @@ -70,7 +74,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant&) { // Max error in interpolated form: 5.195e-08 // Max Error found at float precision = Poly: 8.502534e-08 - static const float P[] = { + BOOST_MATH_STATIC const float P[] = { 3.98942651588301770e-01f, 4.98327234176892844e-02f, 2.91866904423115499e-02f, @@ -83,7 +87,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant&) { // Max error in interpolated form: 1.782e-09 // Max Error found at float precision = Poly: 6.473568e-08 - static const float P[] = { + BOOST_MATH_STATIC const float P[] = { 3.98942391532752700e-01f, 4.98455950638200020e-02f, 2.94835666900682535e-02f @@ -96,7 +100,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant&) } template -T bessel_i0_imp(const T& x, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T bessel_i0_imp(const T& x, const boost::math::integral_constant&) { BOOST_MATH_STD_USING if(x < 7.75) @@ -104,7 +108,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant&) // Bessel I0 over[10 ^ -16, 7.75] // Max error in interpolated form : 3.042e-18 // Max Error found at double precision = Poly : 5.106609e-16 Cheb : 5.239199e-16 - static const double P[] = { + BOOST_MATH_STATIC const double P[] = { 1.00000000000000000e+00, 2.49999999999999909e-01, 2.77777777777782257e-02, @@ -128,7 +132,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant&) { // Max error in interpolated form : 1.685e-16 // Max Error found at double precision = Poly : 2.575063e-16 Cheb : 2.247615e+00 - static const double P[] = { + BOOST_MATH_STATIC const double P[] = { 3.98942280401425088e-01, 4.98677850604961985e-02, 2.80506233928312623e-02, @@ -158,7 +162,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant&) { // Max error in interpolated form : 2.437e-18 // Max Error found at double precision = Poly : 1.216719e-16 - static const double P[] = { + BOOST_MATH_STATIC const double P[] = { 3.98942280401432905e-01, 4.98677850491434560e-02, 2.80506308916506102e-02, @@ -173,7 +177,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant&) } template -T bessel_i0_imp(const T& x, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T bessel_i0_imp(const T& x, const boost::math::integral_constant&) { BOOST_MATH_STD_USING if(x < 7.75) @@ -182,7 +186,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant&) // Max error in interpolated form : 3.899e-20 // Max Error found at float80 precision = Poly : 1.770840e-19 // LCOV_EXCL_START - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 9.99999999999999999961011629e-01), BOOST_MATH_BIG_CONSTANT(T, 64, 2.50000000000000001321873912e-01), BOOST_MATH_BIG_CONSTANT(T, 64, 2.77777777777777703400424216e-02), @@ -211,8 +215,8 @@ T bessel_i0_imp(const T& x, const std::integral_constant&) // Maximum Relative Change in Control Points : 1.631e-04 // Max Error found at float80 precision = Poly : 7.811948e-21 // LCOV_EXCL_START - static const T Y = 4.051098823547363281250e-01f; - static const T P[] = { + BOOST_MATH_STATIC const T Y = 4.051098823547363281250e-01f; + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -6.158081780620616479492e-03), BOOST_MATH_BIG_CONSTANT(T, 64, 4.883635969834048766148e-02), BOOST_MATH_BIG_CONSTANT(T, 64, 7.892782002476195771920e-02), @@ -237,8 +241,8 @@ T bessel_i0_imp(const T& x, const std::integral_constant&) // Maximum Relative Change in Control Points : 1.304e-03 // Max Error found at float80 precision = Poly : 2.303527e-20 // LCOV_EXCL_START - static const T Y = 4.033188819885253906250e-01f; - static const T P[] = { + BOOST_MATH_STATIC const T Y = 4.033188819885253906250e-01f; + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -4.376373876116109401062e-03), BOOST_MATH_BIG_CONSTANT(T, 64, 4.982899138682911273321e-02), BOOST_MATH_BIG_CONSTANT(T, 64, 3.109477529533515397644e-02), @@ -262,8 +266,8 @@ T bessel_i0_imp(const T& x, const std::integral_constant&) // Max error in interpolated form: 1.035e-21 // Max Error found at float80 precision = Poly: 1.885872e-21 // LCOV_EXCL_START - static const T Y = 4.011702537536621093750e-01f; - static const T P[] = { + BOOST_MATH_STATIC const T Y = 4.011702537536621093750e-01f; + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -2.227973351806078464328e-03), BOOST_MATH_BIG_CONSTANT(T, 64, 4.986778486088017419036e-02), BOOST_MATH_BIG_CONSTANT(T, 64, 2.805066823812285310011e-02), @@ -291,7 +295,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant&) // Max error in interpolated form : 5.587e-20 // Max Error found at float80 precision = Poly : 8.776852e-20 // LCOV_EXCL_START - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 3.98942280401432677955074061e-01), BOOST_MATH_BIG_CONSTANT(T, 64, 4.98677850501789875615574058e-02), BOOST_MATH_BIG_CONSTANT(T, 64, 2.80506290908675604202206833e-02), @@ -320,7 +324,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant&) } template -T bessel_i0_imp(const T& x, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T bessel_i0_imp(const T& x, const boost::math::integral_constant&) { BOOST_MATH_STD_USING if(x < 7.75) @@ -329,7 +333,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant&) // Max error in interpolated form : 1.274e-34 // Max Error found at float128 precision = Poly : 3.096091e-34 // LCOV_EXCL_START - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 1.0000000000000000000000000000000001273856e+00), BOOST_MATH_BIG_CONSTANT(T, 113, 2.4999999999999999999999999999999107477496e-01), BOOST_MATH_BIG_CONSTANT(T, 113, 2.7777777777777777777777777777881795230918e-02), @@ -364,7 +368,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant&) // Max error in interpolated form : 7.534e-35 // Max Error found at float128 precision = Poly : 6.123912e-34 // LCOV_EXCL_START - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 9.9999999999999999992388573069504617493518e-01), BOOST_MATH_BIG_CONSTANT(T, 113, 2.5000000000000000007304739268173096975340e-01), BOOST_MATH_BIG_CONSTANT(T, 113, 2.7777777777777777744261405400543564492074e-02), @@ -403,7 +407,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant&) // Max error in interpolated form : 1.808e-34 // Max Error found at float128 precision = Poly : 2.399403e-34 // LCOV_EXCL_START - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 3.9894228040870793650581242239624530714032e-01), BOOST_MATH_BIG_CONSTANT(T, 113, 4.9867780576714783790784348982178607842250e-02), BOOST_MATH_BIG_CONSTANT(T, 113, 2.8051948347934462928487999569249907599510e-02), @@ -445,7 +449,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant&) // Max error in interpolated form : 1.487e-34 // Max Error found at float128 precision = Poly : 1.929924e-34 // LCOV_EXCL_START - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 3.9894228040143267793996798658172135362278e-01), BOOST_MATH_BIG_CONSTANT(T, 113, 4.9867785050179084714910130342157246539820e-02), BOOST_MATH_BIG_CONSTANT(T, 113, 2.8050629090725751585266360464766768437048e-02), @@ -480,7 +484,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant&) // Max error in interpolated form : 5.459e-35 // Max Error found at float128 precision = Poly : 1.472240e-34 // LCOV_EXCL_START - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 3.9894228040143267793994605993438166526772e-01), BOOST_MATH_BIG_CONSTANT(T, 113, 4.9867785050179084742493257495245185241487e-02), BOOST_MATH_BIG_CONSTANT(T, 113, 2.8050629090725735167652437695397756897920e-02), @@ -507,33 +511,33 @@ T bessel_i0_imp(const T& x, const std::integral_constant&) } template -T bessel_i0_imp(const T& x, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T bessel_i0_imp(const T& x, const boost::math::integral_constant&) { if(boost::math::tools::digits() <= 24) - return bessel_i0_imp(x, std::integral_constant()); + return bessel_i0_imp(x, boost::math::integral_constant()); else if(boost::math::tools::digits() <= 53) - return bessel_i0_imp(x, std::integral_constant()); + return bessel_i0_imp(x, boost::math::integral_constant()); else if(boost::math::tools::digits() <= 64) - return bessel_i0_imp(x, std::integral_constant()); + return bessel_i0_imp(x, boost::math::integral_constant()); else if(boost::math::tools::digits() <= 113) - return bessel_i0_imp(x, std::integral_constant()); + return bessel_i0_imp(x, boost::math::integral_constant()); BOOST_MATH_ASSERT(0); return 0; } template -inline T bessel_i0(const T& x) +BOOST_MATH_GPU_ENABLED inline T bessel_i0(const T& x) { - typedef std::integral_constant::digits == 0) || (std::numeric_limits::radix != 2)) ? + typedef boost::math::integral_constant::digits == 0) || (boost::math::numeric_limits::radix != 2)) ? 0 : - std::numeric_limits::digits <= 24 ? + boost::math::numeric_limits::digits <= 24 ? 24 : - std::numeric_limits::digits <= 53 ? + boost::math::numeric_limits::digits <= 53 ? 53 : - std::numeric_limits::digits <= 64 ? + boost::math::numeric_limits::digits <= 64 ? 64 : - std::numeric_limits::digits <= 113 ? + boost::math::numeric_limits::digits <= 113 ? 113 : -1 > tag_type; diff --git a/include/boost/math/special_functions/detail/bessel_i1.hpp b/include/boost/math/special_functions/detail/bessel_i1.hpp index badc35de0b..d2c750df06 100644 --- a/include/boost/math/special_functions/detail/bessel_i1.hpp +++ b/include/boost/math/special_functions/detail/bessel_i1.hpp @@ -1,4 +1,5 @@ // Copyright (c) 2017 John Maddock +// Copyright (c) 2024 Matt Borland // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -17,9 +18,13 @@ #pragma once #endif +#include #include #include #include +#include +#include +#include #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128) // @@ -38,24 +43,24 @@ namespace boost { namespace math { namespace detail{ template -T bessel_i1(const T& x); +BOOST_MATH_GPU_ENABLED T bessel_i1(const T& x); template -T bessel_i1_imp(const T&, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T bessel_i1_imp(const T&, const boost::math::integral_constant&) { BOOST_MATH_ASSERT(0); return 0; } template -T bessel_i1_imp(const T& x, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T bessel_i1_imp(const T& x, const boost::math::integral_constant&) { BOOST_MATH_STD_USING if(x < 7.75) { //Max error in interpolated form : 1.348e-08 // Max Error found at float precision = Poly : 1.469121e-07 - static const float P[] = { + BOOST_MATH_STATIC const float P[] = { 8.333333221e-02f, 6.944453712e-03f, 3.472097211e-04f, @@ -74,7 +79,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant&) // Max error in interpolated form: 9.000e-08 // Max Error found at float precision = Poly: 1.044345e-07 - static const float P[] = { + BOOST_MATH_STATIC const float P[] = { 3.98942115977513013e-01f, -1.49581264836620262e-01f, -4.76475741878486795e-02f, @@ -89,7 +94,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant&) } template -T bessel_i1_imp(const T& x, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T bessel_i1_imp(const T& x, const boost::math::integral_constant&) { BOOST_MATH_STD_USING if(x < 7.75) @@ -98,7 +103,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant&) // Max error in interpolated form: 5.639e-17 // Max Error found at double precision = Poly: 1.795559e-16 - static const double P[] = { + BOOST_MATH_STATIC const double P[] = { 8.333333333333333803e-02, 6.944444444444341983e-03, 3.472222222225921045e-04, @@ -122,7 +127,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant&) // Max error in interpolated form: 1.796e-16 // Max Error found at double precision = Poly: 2.898731e-16 - static const double P[] = { + BOOST_MATH_STATIC const double P[] = { 3.989422804014406054e-01, -1.496033551613111533e-01, -4.675104253598537322e-02, @@ -152,7 +157,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant&) { // Max error in interpolated form: 1.320e-19 // Max Error found at double precision = Poly: 7.065357e-17 - static const double P[] = { + BOOST_MATH_STATIC const double P[] = { 3.989422804014314820e-01, -1.496033551467584157e-01, -4.675105322571775911e-02, @@ -167,7 +172,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant&) } template -T bessel_i1_imp(const T& x, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T bessel_i1_imp(const T& x, const boost::math::integral_constant&) { BOOST_MATH_STD_USING if(x < 7.75) @@ -175,7 +180,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant&) // Bessel I0 over[10 ^ -16, 7.75] // Max error in interpolated form: 8.086e-21 // Max Error found at float80 precision = Poly: 7.225090e-20 - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 8.33333333333333333340071817e-02), BOOST_MATH_BIG_CONSTANT(T, 64, 6.94444444444444442462728070e-03), BOOST_MATH_BIG_CONSTANT(T, 64, 3.47222222222222318886683883e-04), @@ -203,7 +208,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant&) // Maximum Deviation Found : 3.887e-20 // Expected Error Term : 3.887e-20 // Maximum Relative Change in Control Points : 1.681e-04 - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 3.98942260530218897338680e-01), BOOST_MATH_BIG_CONSTANT(T, 64, -1.49599542849073670179540e-01), BOOST_MATH_BIG_CONSTANT(T, 64, -4.70492865454119188276875e-02), @@ -236,7 +241,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant&) // Maximum Relative Change in Control Points : 2.101e-03 // Max Error found at float80 precision = Poly : 6.029974e-20 - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 3.98942280401431675205845e-01), BOOST_MATH_BIG_CONSTANT(T, 64, -1.49603355149968887210170e-01), BOOST_MATH_BIG_CONSTANT(T, 64, -4.67510486284376330257260e-02), @@ -258,7 +263,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant&) // Bessel I0 over[100, INF] // Max error in interpolated form: 2.456e-20 // Max Error found at float80 precision = Poly: 5.446356e-20 - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 3.98942280401432677958445e-01), BOOST_MATH_BIG_CONSTANT(T, 64, -1.49603355150537411254359e-01), BOOST_MATH_BIG_CONSTANT(T, 64, -4.67510484842456251368526e-02), @@ -276,7 +281,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant&) } template -T bessel_i1_imp(const T& x, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T bessel_i1_imp(const T& x, const boost::math::integral_constant&) { BOOST_MATH_STD_USING if(x < 7.75) @@ -285,7 +290,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant&) // Max error in interpolated form: 1.835e-35 // Max Error found at float128 precision = Poly: 1.645036e-34 - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 8.3333333333333333333333333333333331804098e-02), BOOST_MATH_BIG_CONSTANT(T, 113, 6.9444444444444444444444444444445418303082e-03), BOOST_MATH_BIG_CONSTANT(T, 113, 3.4722222222222222222222222222119082346591e-04), @@ -321,7 +326,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant&) // Maximum Relative Change in Control Points : 5.204e-03 // Max Error found at float128 precision = Poly : 2.882561e-34 - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 8.333333333333333326889717360850080939e-02), BOOST_MATH_BIG_CONSTANT(T, 113, 6.944444444444444511272790848815114507e-03), BOOST_MATH_BIG_CONSTANT(T, 113, 3.472222222222221892451965054394153443e-04), @@ -355,7 +360,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant&) // Maximum Deviation Found : 1.766e-35 // Expected Error Term : 1.021e-35 // Maximum Relative Change in Control Points : 6.228e-03 - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 8.333333333333255774414858563409941233e-02), BOOST_MATH_BIG_CONSTANT(T, 113, 6.944444444444897867884955912228700291e-03), BOOST_MATH_BIG_CONSTANT(T, 113, 3.472222222220954970397343617150959467e-04), @@ -389,7 +394,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant&) { // Max error in interpolated form: 8.864e-36 // Max Error found at float128 precision = Poly: 8.522841e-35 - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 3.989422793693152031514179994954750043e-01), BOOST_MATH_BIG_CONSTANT(T, 113, -1.496029423752889591425633234009799670e-01), BOOST_MATH_BIG_CONSTANT(T, 113, -4.682975926820553021482820043377990241e-02), @@ -421,7 +426,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant&) // Max error in interpolated form: 6.028e-35 // Max Error found at float128 precision = Poly: 1.368313e-34 - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 3.989422804012941975429616956496046931e-01), BOOST_MATH_BIG_CONSTANT(T, 113, -1.496033550576049830976679315420681402e-01), BOOST_MATH_BIG_CONSTANT(T, 113, -4.675107835141866009896710750800622147e-02), @@ -456,7 +461,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant&) // Max error in interpolated form: 5.494e-35 // Max Error found at float128 precision = Poly: 1.214651e-34 - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 3.989422804014326779399307367861631577e-01), BOOST_MATH_BIG_CONSTANT(T, 113, -1.496033551505372542086590873271571919e-01), BOOST_MATH_BIG_CONSTANT(T, 113, -4.675104848454290286276466276677172664e-02), @@ -486,7 +491,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant&) // Bessel I0 over[100, INF] // Max error in interpolated form: 6.081e-35 // Max Error found at float128 precision = Poly: 1.407151e-34 - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 3.9894228040143267793994605993438200208417e-01), BOOST_MATH_BIG_CONSTANT(T, 113, -1.4960335515053725422747977247811372936584e-01), BOOST_MATH_BIG_CONSTANT(T, 113, -4.6751048484542891946087411826356811991039e-02), @@ -512,33 +517,33 @@ T bessel_i1_imp(const T& x, const std::integral_constant&) } template -T bessel_i1_imp(const T& x, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T bessel_i1_imp(const T& x, const boost::math::integral_constant&) { if(boost::math::tools::digits() <= 24) - return bessel_i1_imp(x, std::integral_constant()); + return bessel_i1_imp(x, boost::math::integral_constant()); else if(boost::math::tools::digits() <= 53) - return bessel_i1_imp(x, std::integral_constant()); + return bessel_i1_imp(x, boost::math::integral_constant()); else if(boost::math::tools::digits() <= 64) - return bessel_i1_imp(x, std::integral_constant()); + return bessel_i1_imp(x, boost::math::integral_constant()); else if(boost::math::tools::digits() <= 113) - return bessel_i1_imp(x, std::integral_constant()); + return bessel_i1_imp(x, boost::math::integral_constant()); BOOST_MATH_ASSERT(0); return 0; } template -inline T bessel_i1(const T& x) +inline BOOST_MATH_GPU_ENABLED T bessel_i1(const T& x) { - typedef std::integral_constant::digits == 0) || (std::numeric_limits::radix != 2)) ? + typedef boost::math::integral_constant::digits == 0) || (boost::math::numeric_limits::radix != 2)) ? 0 : - std::numeric_limits::digits <= 24 ? + boost::math::numeric_limits::digits <= 24 ? 24 : - std::numeric_limits::digits <= 53 ? + boost::math::numeric_limits::digits <= 53 ? 53 : - std::numeric_limits::digits <= 64 ? + boost::math::numeric_limits::digits <= 64 ? 64 : - std::numeric_limits::digits <= 113 ? + boost::math::numeric_limits::digits <= 113 ? 113 : -1 > tag_type; diff --git a/include/boost/math/special_functions/detail/bessel_ik.hpp b/include/boost/math/special_functions/detail/bessel_ik.hpp index 0c653b4753..b3e7378fd4 100644 --- a/include/boost/math/special_functions/detail/bessel_ik.hpp +++ b/include/boost/math/special_functions/detail/bessel_ik.hpp @@ -1,4 +1,5 @@ // Copyright (c) 2006 Xiaogang Zhang +// Copyright (c) 2024 Matt Borland // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -10,14 +11,17 @@ #pragma once #endif -#include -#include +#include +#include +#include +#include +#include +#include #include #include #include #include #include -#include // Modified Bessel functions of the first and second kind of fractional order @@ -30,13 +34,13 @@ struct cyl_bessel_i_small_z { typedef T result_type; - cyl_bessel_i_small_z(T v_, T z_) : k(0), v(v_), mult(z_*z_/4) + BOOST_MATH_GPU_ENABLED cyl_bessel_i_small_z(T v_, T z_) : k(0), v(v_), mult(z_*z_/4) { BOOST_MATH_STD_USING term = 1; } - T operator()() + BOOST_MATH_GPU_ENABLED T operator()() { T result = term; ++k; @@ -52,7 +56,7 @@ struct cyl_bessel_i_small_z }; template -inline T bessel_i_small_z_series(T v, T x, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T bessel_i_small_z_series(T v, T x, const Policy& pol) { BOOST_MATH_STD_USING T prefix; @@ -69,7 +73,7 @@ inline T bessel_i_small_z_series(T v, T x, const Policy& pol) return prefix; cyl_bessel_i_small_z s(v, x); - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); T result = boost::math::tools::sum_series(s, boost::math::policies::get_epsilon(), max_iter); @@ -80,7 +84,7 @@ inline T bessel_i_small_z_series(T v, T x, const Policy& pol) // Calculate K(v, x) and K(v+1, x) by method analogous to // Temme, Journal of Computational Physics, vol 21, 343 (1976) template -int temme_ik(T v, T x, T* result_K, T* K1, const Policy& pol) +BOOST_MATH_GPU_ENABLED int temme_ik(T v, T x, T* result_K, T* K1, const Policy& pol) { T f, h, p, q, coef, sum, sum1, tolerance; T a, b, c, d, sigma, gamma1, gamma2; @@ -157,7 +161,7 @@ int temme_ik(T v, T x, T* result_K, T* K1, const Policy& pol) // Evaluate continued fraction fv = I_(v+1) / I_v, derived from // Abramowitz and Stegun, Handbook of Mathematical Functions, 1972, 9.1.73 template -int CF1_ik(T v, T x, T* fv, const Policy& pol) +BOOST_MATH_GPU_ENABLED int CF1_ik(T v, T x, T* fv, const Policy& pol) { T C, D, f, a, b, delta, tiny, tolerance; unsigned long k; @@ -204,7 +208,7 @@ int CF1_ik(T v, T x, T* fv, const Policy& pol) // z1 / z0 = U(v+1.5, 2v+1, 2x) / U(v+0.5, 2v+1, 2x), see // Thompson and Barnett, Computer Physics Communications, vol 47, 245 (1987) template -int CF2_ik(T v, T x, T* Kv, T* Kv1, const Policy& pol) +BOOST_MATH_GPU_ENABLED int CF2_ik(T v, T x, T* Kv, T* Kv1, const Policy& pol) { BOOST_MATH_STD_USING using namespace boost::math::constants; @@ -297,7 +301,7 @@ enum{ // Compute I(v, x) and K(v, x) simultaneously by Temme's method, see // Temme, Journal of Computational Physics, vol 19, 324 (1975) template -int bessel_ik(T v, T x, T* result_I, T* result_K, int kind, const Policy& pol) +BOOST_MATH_GPU_ENABLED int bessel_ik(T v, T x, T* result_I, T* result_K, int kind, const Policy& pol) { // Kv1 = K_(v+1), fv = I_(v+1) / I_v // Ku1 = K_(u+1), fu = I_(u+1) / I_u @@ -314,7 +318,7 @@ int bessel_ik(T v, T x, T* result_I, T* result_K, int kind, const Policy& pol) using namespace boost::math::tools; using namespace boost::math::constants; - static const char* function = "boost::math::bessel_ik<%1%>(%1%,%1%)"; + constexpr auto function = "boost::math::bessel_ik<%1%>(%1%,%1%)"; if (v < 0) { @@ -329,7 +333,7 @@ int bessel_ik(T v, T x, T* result_I, T* result_K, int kind, const Policy& pol) if (((kind & need_i) == 0) && (fabs(4 * v * v - 25) / (8 * x) < tools::forth_root_epsilon())) { // A&S 9.7.2 - Iv = std::numeric_limits::quiet_NaN(); // any value will do + Iv = boost::math::numeric_limits::quiet_NaN(); // any value will do T mu = 4 * v * v; T eight_z = 8 * x; Kv = 1 + (mu - 1) / eight_z + (mu - 1) * (mu - 9) / (2 * eight_z * eight_z) + (mu - 1) * (mu - 9) * (mu - 25) / (6 * eight_z * eight_z * eight_z); @@ -410,7 +414,7 @@ int bessel_ik(T v, T x, T* result_I, T* result_K, int kind, const Policy& pol) } } else - Iv = std::numeric_limits::quiet_NaN(); // any value will do + Iv = boost::math::numeric_limits::quiet_NaN(); // any value will do } if (reflect) { diff --git a/include/boost/math/special_functions/detail/bessel_j0.hpp b/include/boost/math/special_functions/detail/bessel_j0.hpp index 9a0b26fe6b..2df027b21d 100644 --- a/include/boost/math/special_functions/detail/bessel_j0.hpp +++ b/include/boost/math/special_functions/detail/bessel_j0.hpp @@ -10,6 +10,7 @@ #pragma once #endif +#include #include #include #include @@ -32,10 +33,10 @@ namespace boost { namespace math { namespace detail{ template -T bessel_j0(T x); +BOOST_MATH_GPU_ENABLED T bessel_j0(T x); template -T bessel_j0(T x) +BOOST_MATH_GPU_ENABLED T bessel_j0(T x) { #ifdef BOOST_MATH_INSTRUMENT static bool b = false; @@ -48,7 +49,7 @@ T bessel_j0(T x) } #endif - static const T P1[] = { + BOOST_MATH_STATIC const T P1[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -4.1298668500990866786e+11)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.7282507878605942706e+10)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -6.2140700423540120665e+08)), @@ -57,7 +58,7 @@ T bessel_j0(T x) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0344222815443188943e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.2117036164593528341e-01)) }; - static const T Q1[] = { + BOOST_MATH_STATIC const T Q1[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.3883787996332290397e+12)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.6328198300859648632e+10)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.3985097372263433271e+08)), @@ -66,7 +67,7 @@ T bessel_j0(T x) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.0)) }; - static const T P2[] = { + BOOST_MATH_STATIC const T P2[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.8319397969392084011e+03)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.2254078161378989535e+04)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -7.2879702464464618998e+03)), @@ -76,7 +77,7 @@ T bessel_j0(T x) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 7.4321196680624245801e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 4.8591703355916499363e+01)) }; - static const T Q2[] = { + BOOST_MATH_STATIC const T Q2[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -3.5783478026152301072e+05)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.4599102262586308984e+05)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -8.4055062591169562211e+04)), @@ -86,7 +87,7 @@ T bessel_j0(T x) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -2.5258076240801555057e+01)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)) }; - static const T PC[] = { + BOOST_MATH_STATIC const T PC[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.2779090197304684302e+04)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 4.1345386639580765797e+04)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.1170523380864944322e+04)), @@ -94,7 +95,7 @@ T bessel_j0(T x) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.5376201909008354296e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 8.8961548424210455236e-01)) }; - static const T QC[] = { + BOOST_MATH_STATIC const T QC[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.2779090197304684318e+04)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 4.1370412495510416640e+04)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.1215350561880115730e+04)), @@ -102,7 +103,7 @@ T bessel_j0(T x) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.5711159858080893649e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)) }; - static const T PS[] = { + BOOST_MATH_STATIC const T PS[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -8.9226600200800094098e+01)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.8591953644342993800e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.1183429920482737611e+02)), @@ -110,7 +111,7 @@ T bessel_j0(T x) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.2441026745835638459e+00)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -8.8033303048680751817e-03)) }; - static const T QS[] = { + BOOST_MATH_STATIC const T QS[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 5.7105024128512061905e+03)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.1951131543434613647e+04)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 7.2642780169211018836e+03)), @@ -118,12 +119,13 @@ T bessel_j0(T x) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 9.0593769594993125859e+01)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)) }; - static const T x1 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.4048255576957727686e+00)), - x2 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 5.5200781102863106496e+00)), - x11 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 6.160e+02)), - x12 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.42444230422723137837e-03)), - x21 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.4130e+03)), - x22 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 5.46860286310649596604e-04)); + + BOOST_MATH_STATIC const T x1 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.4048255576957727686e+00)); + BOOST_MATH_STATIC const T x2 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 5.5200781102863106496e+00)); + BOOST_MATH_STATIC const T x11 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 6.160e+02)); + BOOST_MATH_STATIC const T x12 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.42444230422723137837e-03)); + BOOST_MATH_STATIC const T x21 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.4130e+03)); + BOOST_MATH_STATIC const T x22 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 5.46860286310649596604e-04)); T value, factor, r, rc, rs; diff --git a/include/boost/math/special_functions/detail/bessel_j1.hpp b/include/boost/math/special_functions/detail/bessel_j1.hpp index 6d354dcce7..43df9fa0c1 100644 --- a/include/boost/math/special_functions/detail/bessel_j1.hpp +++ b/include/boost/math/special_functions/detail/bessel_j1.hpp @@ -10,6 +10,7 @@ #pragma once #endif +#include #include #include #include @@ -32,27 +33,29 @@ namespace boost { namespace math{ namespace detail{ template -T bessel_j1(T x); +BOOST_MATH_GPU_ENABLED T bessel_j1(T x); template struct bessel_j1_initializer { struct init { - init() + BOOST_MATH_GPU_ENABLED init() { do_init(); } - static void do_init() + BOOST_MATH_GPU_ENABLED static void do_init() { bessel_j1(T(1)); } - void force_instantiate()const{} + BOOST_MATH_GPU_ENABLED void force_instantiate()const{} }; - static const init initializer; - static void force_instantiate() + BOOST_MATH_STATIC const init initializer; + BOOST_MATH_GPU_ENABLED static void force_instantiate() { + #ifndef BOOST_MATH_HAS_GPU_SUPPORT initializer.force_instantiate(); + #endif } }; @@ -60,11 +63,11 @@ template const typename bessel_j1_initializer::init bessel_j1_initializer::initializer; template -T bessel_j1(T x) +BOOST_MATH_GPU_ENABLED T bessel_j1(T x) { bessel_j1_initializer::force_instantiate(); - static const T P1[] = { + BOOST_MATH_STATIC const T P1[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.4258509801366645672e+11)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 6.6781041261492395835e+09)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.1548696764841276794e+08)), @@ -73,7 +76,7 @@ T bessel_j1(T x) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0650724020080236441e+01)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.0767857011487300348e-02)) }; - static const T Q1[] = { + BOOST_MATH_STATIC const T Q1[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 4.1868604460820175290e+12)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 4.2091902282580133541e+10)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.0228375140097033958e+08)), @@ -82,7 +85,7 @@ T bessel_j1(T x) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.0)) }; - static const T P2[] = { + BOOST_MATH_STATIC const T P2[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.7527881995806511112e+16)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.6608531731299018674e+15)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -3.6658018905416665164e+13)), @@ -92,7 +95,7 @@ T bessel_j1(T x) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -7.5023342220781607561e+03)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 4.6179191852758252278e+00)) }; - static const T Q2[] = { + BOOST_MATH_STATIC const T Q2[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.7253905888447681194e+18)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.7128800897135812012e+16)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 8.4899346165481429307e+13)), @@ -102,7 +105,7 @@ T bessel_j1(T x) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.3886978985861357615e+03)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)) }; - static const T PC[] = { + BOOST_MATH_STATIC const T PC[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -4.4357578167941278571e+06)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -9.9422465050776411957e+06)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -6.6033732483649391093e+06)), @@ -111,7 +114,7 @@ T bessel_j1(T x) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.6116166443246101165e+03)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.0)) }; - static const T QC[] = { + BOOST_MATH_STATIC const T QC[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -4.4357578167941278568e+06)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -9.9341243899345856590e+06)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -6.5853394797230870728e+06)), @@ -120,7 +123,7 @@ T bessel_j1(T x) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.4550094401904961825e+03)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)) }; - static const T PS[] = { + BOOST_MATH_STATIC const T PS[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 3.3220913409857223519e+04)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 8.5145160675335701966e+04)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 6.6178836581270835179e+04)), @@ -129,7 +132,7 @@ T bessel_j1(T x) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 3.5265133846636032186e+01)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.0)) }; - static const T QS[] = { + BOOST_MATH_STATIC const T QS[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 7.0871281941028743574e+05)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.8194580422439972989e+06)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.4194606696037208929e+06)), @@ -138,12 +141,13 @@ T bessel_j1(T x) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 8.6383677696049909675e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)) }; - static const T x1 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 3.8317059702075123156e+00)), - x2 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 7.0155866698156187535e+00)), - x11 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 9.810e+02)), - x12 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -3.2527979248768438556e-04)), - x21 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.7960e+03)), - x22 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -3.8330184381246462950e-05)); + + BOOST_MATH_STATIC const T x1 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 3.8317059702075123156e+00)); + BOOST_MATH_STATIC const T x2 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 7.0155866698156187535e+00)); + BOOST_MATH_STATIC const T x11 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 9.810e+02)); + BOOST_MATH_STATIC const T x12 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -3.2527979248768438556e-04)); + BOOST_MATH_STATIC const T x21 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.7960e+03)); + BOOST_MATH_STATIC const T x22 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -3.8330184381246462950e-05)); T value, factor, r, rc, rs, w; diff --git a/include/boost/math/special_functions/detail/bessel_jn.hpp b/include/boost/math/special_functions/detail/bessel_jn.hpp index a08af05485..73bc0c5621 100644 --- a/include/boost/math/special_functions/detail/bessel_jn.hpp +++ b/include/boost/math/special_functions/detail/bessel_jn.hpp @@ -10,6 +10,10 @@ #pragma once #endif +#include +#include +#include +#include #include #include #include @@ -24,7 +28,7 @@ namespace boost { namespace math { namespace detail{ template -T bessel_jn(int n, T x, const Policy& pol) +BOOST_MATH_GPU_ENABLED T bessel_jn(int n, T x, const Policy& pol) { T value(0), factor, current, prev, next; diff --git a/include/boost/math/special_functions/detail/bessel_jy.hpp b/include/boost/math/special_functions/detail/bessel_jy.hpp index 90e099eb77..143dce872c 100644 --- a/include/boost/math/special_functions/detail/bessel_jy.hpp +++ b/include/boost/math/special_functions/detail/bessel_jy.hpp @@ -11,16 +11,18 @@ #endif #include +#include +#include #include #include #include #include #include +#include #include #include #include #include -#include // Bessel functions of the first and second kind of fractional order @@ -38,7 +40,7 @@ namespace boost { namespace math { // try it and see... // template - bool hankel_PQ(T v, T x, T* p, T* q, const Policy& ) + BOOST_MATH_GPU_ENABLED bool hankel_PQ(T v, T x, T* p, T* q, const Policy& ) { BOOST_MATH_STD_USING T tolerance = 2 * policies::get_epsilon(); @@ -70,7 +72,7 @@ namespace boost { namespace math { // Calculate Y(v, x) and Y(v+1, x) by Temme's method, see // Temme, Journal of Computational Physics, vol 21, 343 (1976) template - int temme_jy(T v, T x, T* Y, T* Y1, const Policy& pol) + BOOST_MATH_GPU_ENABLED int temme_jy(T v, T x, T* Y, T* Y1, const Policy& pol) { T g, h, p, q, f, coef, sum, sum1, tolerance; T a, d, e, sigma; @@ -139,7 +141,7 @@ namespace boost { namespace math { // Evaluate continued fraction fv = J_(v+1) / J_v, see // Abramowitz and Stegun, Handbook of Mathematical Functions, 1972, 9.1.73 template - int CF1_jy(T v, T x, T* fv, int* sign, const Policy& pol) + BOOST_MATH_GPU_ENABLED int CF1_jy(T v, T x, T* fv, int* sign, const Policy& pol) { T C, D, f, a, b, delta, tiny, tolerance; unsigned long k; @@ -185,7 +187,7 @@ namespace boost { namespace math { // real values only. // template - int CF2_jy(T v, T x, T* p, T* q, const Policy& pol) + BOOST_MATH_GPU_ENABLED int CF2_jy(T v, T x, T* p, T* q, const Policy& pol) { BOOST_MATH_STD_USING @@ -254,13 +256,13 @@ namespace boost { namespace math { return 0; } - static const int need_j = 1; - static const int need_y = 2; + BOOST_MATH_STATIC const int need_j = 1; + BOOST_MATH_STATIC const int need_y = 2; // Compute J(v, x) and Y(v, x) simultaneously by Steed's method, see // Barnett et al, Computer Physics Communications, vol 8, 377 (1974) template - int bessel_jy(T v, T x, T* J, T* Y, int kind, const Policy& pol) + BOOST_MATH_GPU_ENABLED int bessel_jy(T v, T x, T* J, T* Y, int kind, const Policy& pol) { BOOST_MATH_ASSERT(x >= 0); @@ -273,7 +275,7 @@ namespace boost { namespace math { T cp = 0; T sp = 0; - static const char* function = "boost::math::bessel_jy<%1%>(%1%,%1%)"; + constexpr auto function = "boost::math::bessel_jy<%1%>(%1%,%1%)"; BOOST_MATH_STD_USING using namespace boost::math::tools; @@ -284,7 +286,7 @@ namespace boost { namespace math { reflect = true; v = -v; // v is non-negative from here } - if (v > static_cast((std::numeric_limits::max)())) + if (v > static_cast((boost::math::numeric_limits::max)())) { *J = *Y = policies::raise_evaluation_error(function, "Order of Bessel function is too large to evaluate: got %1%", v, pol); return 1; // LCOV_EXCL_LINE previous line will throw. @@ -310,10 +312,10 @@ namespace boost { namespace math { else if(kind & need_j) *J = policies::raise_domain_error(function, "Value of Bessel J_v(x) is complex-infinity at %1%", x, pol); // complex infinity else - *J = std::numeric_limits::quiet_NaN(); // LCOV_EXCL_LINE, we should never get here, any value will do, not using J. + *J = boost::math::numeric_limits::quiet_NaN(); // LCOV_EXCL_LINE, we should never get here, any value will do, not using J. if((kind & need_y) == 0) - *Y = std::numeric_limits::quiet_NaN(); // any value will do, not using Y. + *Y = boost::math::numeric_limits::quiet_NaN(); // any value will do, not using Y. else { // We shoud never get here: @@ -333,7 +335,7 @@ namespace boost { namespace math { // and divergent which leads to large errors :-( // Jv = bessel_j_small_z_series(v, x, pol); - Yv = std::numeric_limits::quiet_NaN(); + Yv = boost::math::numeric_limits::quiet_NaN(); } else if((x < 1) && (u != 0) && (log(policies::get_epsilon() / 2) > v * log((x/2) * (x/2) / v))) { @@ -344,7 +346,7 @@ namespace boost { namespace math { if(kind&need_j) Jv = bessel_j_small_z_series(v, x, pol); else - Jv = std::numeric_limits::quiet_NaN(); + Jv = boost::math::numeric_limits::quiet_NaN(); if((org_kind&need_y && (!reflect || (cp != 0))) || (org_kind & need_j && (reflect && (sp != 0)))) { @@ -352,7 +354,7 @@ namespace boost { namespace math { Yv = bessel_y_small_z_series(v, x, &Yv_scale, pol); } else - Yv = std::numeric_limits::quiet_NaN(); + Yv = boost::math::numeric_limits::quiet_NaN(); } else if((u == 0) && (x < policies::get_epsilon())) { @@ -363,7 +365,7 @@ namespace boost { namespace math { if(kind&need_j) Jv = bessel_j_small_z_series(v, x, pol); else - Jv = std::numeric_limits::quiet_NaN(); + Jv = boost::math::numeric_limits::quiet_NaN(); if((org_kind&need_y && (!reflect || (cp != 0))) || (org_kind & need_j && (reflect && (sp != 0)))) { @@ -371,7 +373,7 @@ namespace boost { namespace math { Yv = bessel_yn_small_z(n, x, &Yv_scale, pol); } else - Yv = std::numeric_limits::quiet_NaN(); + Yv = boost::math::numeric_limits::quiet_NaN(); // LCOV_EXCL_STOP } else if(asymptotic_bessel_large_x_limit(v, x)) @@ -381,13 +383,13 @@ namespace boost { namespace math { Yv = asymptotic_bessel_y_large_x_2(v, x, pol); } else - Yv = std::numeric_limits::quiet_NaN(); // any value will do, we're not using it. + Yv = boost::math::numeric_limits::quiet_NaN(); // any value will do, we're not using it. if(kind&need_j) { Jv = asymptotic_bessel_j_large_x_2(v, x, pol); } else - Jv = std::numeric_limits::quiet_NaN(); // any value will do, we're not using it. + Jv = boost::math::numeric_limits::quiet_NaN(); // any value will do, we're not using it. } else if((x > 8) && hankel_PQ(v, x, &p, &q, pol)) { @@ -449,7 +451,7 @@ namespace boost { namespace math { Jv = scale * W / (Yv * fv - Yv1); // Wronskian relation } else - Jv = std::numeric_limits::quiet_NaN(); // any value will do, we're not using it. + Jv = boost::math::numeric_limits::quiet_NaN(); // any value will do, we're not using it. Yv_scale = scale; } else // x in (2, \infty) @@ -564,7 +566,7 @@ namespace boost { namespace math { Yv = prev; } else - Yv = std::numeric_limits::quiet_NaN(); // any value will do, we're not using it. + Yv = boost::math::numeric_limits::quiet_NaN(); // any value will do, we're not using it. } if (reflect) diff --git a/include/boost/math/special_functions/detail/bessel_jy_asym.hpp b/include/boost/math/special_functions/detail/bessel_jy_asym.hpp index cb09b202d5..51e4efafca 100644 --- a/include/boost/math/special_functions/detail/bessel_jy_asym.hpp +++ b/include/boost/math/special_functions/detail/bessel_jy_asym.hpp @@ -16,12 +16,15 @@ #pragma once #endif +#include +#include #include +#include namespace boost{ namespace math{ namespace detail{ template -inline T asymptotic_bessel_amplitude(T v, T x) +BOOST_MATH_GPU_ENABLED inline T asymptotic_bessel_amplitude(T v, T x) { // Calculate the amplitude of J(v, x) and Y(v, x) for large // x: see A&S 9.2.28. @@ -39,7 +42,7 @@ inline T asymptotic_bessel_amplitude(T v, T x) } template -T asymptotic_bessel_phase_mx(T v, T x) +BOOST_MATH_GPU_ENABLED T asymptotic_bessel_phase_mx(T v, T x) { // // Calculate the phase of J(v, x) and Y(v, x) for large x. @@ -63,7 +66,7 @@ T asymptotic_bessel_phase_mx(T v, T x) } template -inline T asymptotic_bessel_y_large_x_2(T v, T x, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T asymptotic_bessel_y_large_x_2(T v, T x, const Policy& pol) { // See A&S 9.2.19. BOOST_MATH_STD_USING @@ -93,7 +96,7 @@ inline T asymptotic_bessel_y_large_x_2(T v, T x, const Policy& pol) } template -inline T asymptotic_bessel_j_large_x_2(T v, T x, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T asymptotic_bessel_j_large_x_2(T v, T x, const Policy& pol) { // See A&S 9.2.19. BOOST_MATH_STD_USING @@ -124,7 +127,7 @@ inline T asymptotic_bessel_j_large_x_2(T v, T x, const Policy& pol) } template -inline bool asymptotic_bessel_large_x_limit(int v, const T& x) +BOOST_MATH_GPU_ENABLED inline bool asymptotic_bessel_large_x_limit(int v, const T& x) { BOOST_MATH_STD_USING // @@ -142,7 +145,7 @@ inline bool asymptotic_bessel_large_x_limit(int v, const T& x) } template -inline bool asymptotic_bessel_large_x_limit(const T& v, const T& x) +BOOST_MATH_GPU_ENABLED inline bool asymptotic_bessel_large_x_limit(const T& v, const T& x) { BOOST_MATH_STD_USING // @@ -155,11 +158,11 @@ inline bool asymptotic_bessel_large_x_limit(const T& v, const T& x) // error rates either side of the divide for v < 10000. // At double precision eps^1/8 ~= 0.01. // - return (std::max)(T(fabs(v)), T(1)) < x * sqrt(tools::forth_root_epsilon()); + return BOOST_MATH_GPU_SAFE_MAX(T(fabs(v)), T(1)) < x * sqrt(tools::forth_root_epsilon()); } template -void temme_asymptotic_y_small_x(T v, T x, T* Y, T* Y1, const Policy& pol) +BOOST_MATH_GPU_ENABLED void temme_asymptotic_y_small_x(T v, T x, T* Y, T* Y1, const Policy& pol) { T c = 1; T p = (v / boost::math::sin_pi(v, pol)) * pow(x / 2, -v) / boost::math::tgamma(1 - v, pol); @@ -193,7 +196,7 @@ void temme_asymptotic_y_small_x(T v, T x, T* Y, T* Y1, const Policy& pol) } template -T asymptotic_bessel_i_large_x(T v, T x, const Policy& pol) +BOOST_MATH_GPU_ENABLED T asymptotic_bessel_i_large_x(T v, T x, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std names T s = 1; diff --git a/include/boost/math/special_functions/detail/bessel_jy_series.hpp b/include/boost/math/special_functions/detail/bessel_jy_series.hpp index db46f36400..5c083f3483 100644 --- a/include/boost/math/special_functions/detail/bessel_jy_series.hpp +++ b/include/boost/math/special_functions/detail/bessel_jy_series.hpp @@ -10,10 +10,9 @@ #pragma once #endif -#include -#include #include #include +#include namespace boost { namespace math { namespace detail{ @@ -22,7 +21,7 @@ struct bessel_j_small_z_series_term { typedef T result_type; - bessel_j_small_z_series_term(T v_, T x) + BOOST_MATH_GPU_ENABLED bessel_j_small_z_series_term(T v_, T x) : N(0), v(v_) { BOOST_MATH_STD_USING @@ -30,7 +29,7 @@ struct bessel_j_small_z_series_term mult *= -mult; term = 1; } - T operator()() + BOOST_MATH_GPU_ENABLED T operator()() { T r = term; ++N; @@ -49,7 +48,7 @@ struct bessel_j_small_z_series_term // Converges rapidly for all z << v. // template -inline T bessel_j_small_z_series(T v, T x, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T bessel_j_small_z_series(T v, T x, const Policy& pol) { BOOST_MATH_STD_USING T prefix; @@ -66,7 +65,7 @@ inline T bessel_j_small_z_series(T v, T x, const Policy& pol) return prefix; bessel_j_small_z_series_term s(v, x); - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); T result = boost::math::tools::sum_series(s, boost::math::policies::get_epsilon(), max_iter); @@ -79,7 +78,7 @@ struct bessel_y_small_z_series_term_a { typedef T result_type; - bessel_y_small_z_series_term_a(T v_, T x) + BOOST_MATH_GPU_ENABLED bessel_y_small_z_series_term_a(T v_, T x) : N(0), v(v_) { BOOST_MATH_STD_USING @@ -87,7 +86,7 @@ struct bessel_y_small_z_series_term_a mult *= -mult; term = 1; } - T operator()() + BOOST_MATH_GPU_ENABLED T operator()() { BOOST_MATH_STD_USING T r = term; @@ -107,7 +106,7 @@ struct bessel_y_small_z_series_term_b { typedef T result_type; - bessel_y_small_z_series_term_b(T v_, T x) + BOOST_MATH_GPU_ENABLED bessel_y_small_z_series_term_b(T v_, T x) : N(0), v(v_) { BOOST_MATH_STD_USING @@ -115,7 +114,7 @@ struct bessel_y_small_z_series_term_b mult *= -mult; term = 1; } - T operator()() + BOOST_MATH_GPU_ENABLED T operator()() { T r = term; ++N; @@ -138,10 +137,10 @@ struct bessel_y_small_z_series_term_b // eps/2 * v^v(x/2)^-v > (x/2)^v or log(eps/2) > v log((x/2)^2/v) // template -inline T bessel_y_small_z_series(T v, T x, T* pscale, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T bessel_y_small_z_series(T v, T x, T* pscale, const Policy& pol) { BOOST_MATH_STD_USING - static const char* function = "bessel_y_small_z_series<%1%>(%1%,%1%)"; + constexpr auto function = "bessel_y_small_z_series<%1%>(%1%,%1%)"; T prefix; T gam; T p = log(x / 2); @@ -183,7 +182,7 @@ inline T bessel_y_small_z_series(T v, T x, T* pscale, const Policy& pol) prefix = -exp(prefix); } bessel_y_small_z_series_term_a s(v, x); - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); *pscale = scale; T result = boost::math::tools::sum_series(s, boost::math::policies::get_epsilon(), max_iter); @@ -211,7 +210,7 @@ inline T bessel_y_small_z_series(T v, T x, T* pscale, const Policy& pol) } template -T bessel_yn_small_z(int n, T z, T* scale, const Policy& pol) +BOOST_MATH_GPU_ENABLED T bessel_yn_small_z(int n, T z, T* scale, const Policy& pol) { // // See http://functions.wolfram.com/Bessel-TypeFunctions/BesselY/06/01/04/01/02/ diff --git a/include/boost/math/special_functions/detail/bessel_jy_zero.hpp b/include/boost/math/special_functions/detail/bessel_jy_zero.hpp index cb1fc48d83..15671c0df7 100644 --- a/include/boost/math/special_functions/detail/bessel_jy_zero.hpp +++ b/include/boost/math/special_functions/detail/bessel_jy_zero.hpp @@ -18,19 +18,30 @@ #ifndef BOOST_MATH_BESSEL_JY_ZERO_2013_01_18_HPP_ #define BOOST_MATH_BESSEL_JY_ZERO_2013_01_18_HPP_ - #include + #include + #include + #include + #include + #include #include - #include #include #include + #ifndef BOOST_MATH_HAS_NVRTC + #include + #endif + + #ifdef BOOST_MATH_ENABLE_CUDA + # pragma nv_diag_suppress 20012 + #endif + namespace boost { namespace math { namespace detail { namespace bessel_zero { template - T equation_nist_10_21_19(const T& v, const T& a) + BOOST_MATH_GPU_ENABLED T equation_nist_10_21_19(const T& v, const T& a) { // Get the initial estimate of the m'th root of Jv or Yv. // This subroutine is used for the order m with m > 1. @@ -57,11 +68,11 @@ class equation_as_9_3_39_and_its_derivative { public: - explicit equation_as_9_3_39_and_its_derivative(const T& zt) : zeta(zt) { } + BOOST_MATH_GPU_ENABLED explicit equation_as_9_3_39_and_its_derivative(const T& zt) : zeta(zt) { } - equation_as_9_3_39_and_its_derivative(const equation_as_9_3_39_and_its_derivative&) = default; + BOOST_MATH_GPU_ENABLED equation_as_9_3_39_and_its_derivative(const equation_as_9_3_39_and_its_derivative&) = default; - boost::math::tuple operator()(const T& z) const + BOOST_MATH_GPU_ENABLED boost::math::tuple operator()(const T& z) const { BOOST_MATH_STD_USING // ADL of std names, needed for acos, sqrt. @@ -86,7 +97,7 @@ }; template - static T equation_as_9_5_26(const T& v, const T& ai_bi_root, const Policy& pol) + BOOST_MATH_GPU_ENABLED T equation_as_9_5_26(const T& v, const T& ai_bi_root, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std names, needed for log, sqrt. @@ -132,9 +143,9 @@ // Select the maximum allowed iterations based on the number // of decimal digits in the numeric type T, being at least 12. - const auto iterations_allowed = static_cast((std::max)(12, my_digits10 * 2)); + const auto iterations_allowed = static_cast(BOOST_MATH_GPU_SAFE_MAX(12, my_digits10 * 2)); - std::uintmax_t iterations_used = iterations_allowed; + boost::math::uintmax_t iterations_used = iterations_allowed; // Calculate the root of z as a function of zeta. const T z = boost::math::tools::newton_raphson_iterate( @@ -142,7 +153,7 @@ z_estimate, range_zmin, range_zmax, - (std::min)(boost::math::tools::digits(), boost::math::tools::digits()), + BOOST_MATH_GPU_SAFE_MIN(boost::math::tools::digits(), boost::math::tools::digits()), iterations_used); static_cast(iterations_used); @@ -168,7 +179,7 @@ namespace cyl_bessel_j_zero_detail { template - T equation_nist_10_21_40_a(const T& v, const Policy& pol) + BOOST_MATH_GPU_ENABLED T equation_nist_10_21_40_a(const T& v, const Policy& pol) { const T v_pow_third(boost::math::cbrt(v, pol)); const T v_pow_minus_two_thirds(T(1) / (v_pow_third * v_pow_third)); @@ -185,13 +196,13 @@ class function_object_jv { public: - function_object_jv(const T& v, + BOOST_MATH_GPU_ENABLED function_object_jv(const T& v, const Policy& pol) : my_v(v), my_pol(pol) { } - function_object_jv(const function_object_jv&) = default; + BOOST_MATH_GPU_ENABLED function_object_jv(const function_object_jv&) = default; - T operator()(const T& x) const + BOOST_MATH_GPU_ENABLED T operator()(const T& x) const { return boost::math::cyl_bessel_j(my_v, x, my_pol); } @@ -206,15 +217,16 @@ class function_object_jv_and_jv_prime { public: - function_object_jv_and_jv_prime(const T& v, - const bool order_is_zero, - const Policy& pol) : my_v(v), + BOOST_MATH_GPU_ENABLED function_object_jv_and_jv_prime( + const T& v, + const bool order_is_zero, + const Policy& pol) : my_v(v), my_order_is_zero(order_is_zero), my_pol(pol) { } function_object_jv_and_jv_prime(const function_object_jv_and_jv_prime&) = default; - boost::math::tuple operator()(const T& x) const + BOOST_MATH_GPU_ENABLED boost::math::tuple operator()(const T& x) const { // Obtain Jv(x) and Jv'(x). // Chris's original code called the Bessel function implementation layer direct, @@ -246,10 +258,10 @@ const function_object_jv_and_jv_prime& operator=(const function_object_jv_and_jv_prime&) = delete; }; - template bool my_bisection_unreachable_tolerance(const T&, const T&) { return false; } + template BOOST_MATH_GPU_ENABLED bool my_bisection_unreachable_tolerance(const T&, const T&) { return false; } template - T initial_guess(const T& v, const int m, const Policy& pol) + BOOST_MATH_GPU_ENABLED T initial_guess(const T& v, const int m, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std names, needed for floor. @@ -325,7 +337,7 @@ } // Perform several steps of bisection iteration to refine the guess. - std::uintmax_t number_of_iterations(12U); + boost::math::uintmax_t number_of_iterations(12U); // Do the bisection iteration. const boost::math::tuple guess_pair = @@ -390,7 +402,7 @@ namespace cyl_neumann_zero_detail { template - T equation_nist_10_21_40_b(const T& v, const Policy& pol) + BOOST_MATH_GPU_ENABLED T equation_nist_10_21_40_b(const T& v, const Policy& pol) { const T v_pow_third(boost::math::cbrt(v, pol)); const T v_pow_minus_two_thirds(T(1) / (v_pow_third * v_pow_third)); @@ -407,13 +419,13 @@ class function_object_yv { public: - function_object_yv(const T& v, - const Policy& pol) : my_v(v), - my_pol(pol) { } + BOOST_MATH_GPU_ENABLED function_object_yv(const T& v, + const Policy& pol) : my_v(v), + my_pol(pol) { } - function_object_yv(const function_object_yv&) = default; + BOOST_MATH_GPU_ENABLED function_object_yv(const function_object_yv&) = default; - T operator()(const T& x) const + BOOST_MATH_GPU_ENABLED T operator()(const T& x) const { return boost::math::cyl_neumann(my_v, x, my_pol); } @@ -428,13 +440,13 @@ class function_object_yv_and_yv_prime { public: - function_object_yv_and_yv_prime(const T& v, - const Policy& pol) : my_v(v), - my_pol(pol) { } + BOOST_MATH_GPU_ENABLED function_object_yv_and_yv_prime(const T& v, + const Policy& pol) : my_v(v), + my_pol(pol) { } - function_object_yv_and_yv_prime(const function_object_yv_and_yv_prime&) = default; + BOOST_MATH_GPU_ENABLED function_object_yv_and_yv_prime(const function_object_yv_and_yv_prime&) = default; - boost::math::tuple operator()(const T& x) const + BOOST_MATH_GPU_ENABLED boost::math::tuple operator()(const T& x) const { const T half_epsilon(boost::math::tools::epsilon() / 2U); @@ -469,10 +481,10 @@ const function_object_yv_and_yv_prime& operator=(const function_object_yv_and_yv_prime&) = delete; }; - template bool my_bisection_unreachable_tolerance(const T&, const T&) { return false; } + template BOOST_MATH_GPU_ENABLED bool my_bisection_unreachable_tolerance(const T&, const T&) { return false; } template - T initial_guess(const T& v, const int m, const Policy& pol) + BOOST_MATH_GPU_ENABLED T initial_guess(const T& v, const int m, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std names, needed for floor. @@ -560,7 +572,7 @@ } // Perform several steps of bisection iteration to refine the guess. - std::uintmax_t number_of_iterations(12U); + boost::math::uintmax_t number_of_iterations(12U); // Do the bisection iteration. const boost::math::tuple guess_pair = @@ -624,4 +636,8 @@ } // namespace bessel_zero } } } // namespace boost::math::detail + #ifdef BOOST_MATH_ENABLE_CUDA + # pragma nv_diag_default 20012 + #endif + #endif // BOOST_MATH_BESSEL_JY_ZERO_2013_01_18_HPP_ diff --git a/include/boost/math/special_functions/detail/bessel_k0.hpp b/include/boost/math/special_functions/detail/bessel_k0.hpp index f29ffa75c4..bab202b6cd 100644 --- a/include/boost/math/special_functions/detail/bessel_k0.hpp +++ b/include/boost/math/special_functions/detail/bessel_k0.hpp @@ -13,10 +13,14 @@ #pragma warning(disable:4702) // Unreachable code (release mode only warning) #endif +#include +#include +#include +#include #include #include -#include #include +#include #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128) // @@ -44,35 +48,37 @@ namespace boost { namespace math { namespace detail{ template -T bessel_k0(const T& x); +BOOST_MATH_GPU_ENABLED T bessel_k0(const T& x); template struct bessel_k0_initializer { struct init { - init() + BOOST_MATH_GPU_ENABLED init() { do_init(tag()); } - static void do_init(const std::integral_constant&) + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&) { bessel_k0(T(0.5)); bessel_k0(T(1.5)); } - static void do_init(const std::integral_constant&) + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&) { bessel_k0(T(0.5)); bessel_k0(T(1.5)); } template - static void do_init(const U&){} - void force_instantiate()const{} + BOOST_MATH_GPU_ENABLED static void do_init(const U&){} + BOOST_MATH_GPU_ENABLED void force_instantiate()const{} }; - static const init initializer; - static void force_instantiate() + BOOST_MATH_STATIC const init initializer; + BOOST_MATH_GPU_ENABLED static void force_instantiate() { + #ifndef BOOST_MATH_HAS_GPU_SUPPORT initializer.force_instantiate(); + #endif } }; @@ -81,14 +87,14 @@ const typename bessel_k0_initializer::init bessel_k0_initializer template -T bessel_k0_imp(const T&, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T bessel_k0_imp(const T&, const boost::math::integral_constant&) { BOOST_MATH_ASSERT(0); return 0; } template -T bessel_k0_imp(const T& x, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T bessel_k0_imp(const T& x, const boost::math::integral_constant&) { BOOST_MATH_STD_USING if(x <= 1) @@ -97,14 +103,14 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) // Expected Error Term : -2.358e-09 // Maximum Relative Change in Control Points : 9.552e-02 // Max Error found at float precision = Poly : 4.448220e-08 - static const T Y = 1.137250900268554688f; - static const T P[] = + BOOST_MATH_STATIC const T Y = 1.137250900268554688f; + BOOST_MATH_STATIC const T P[] = { -1.372508979104259711e-01f, 2.622545986273687617e-01f, 5.047103728247919836e-03f }; - static const T Q[] = + BOOST_MATH_STATIC const T Q[] = { 1.000000000000000000e+00f, -8.928694018000029415e-02f, @@ -117,7 +123,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) // Expected Error Term : -1.343e-09 // Maximum Relative Change in Control Points : 2.405e-02 // Max Error found at float precision = Poly : 1.354814e-07 - static const T P2[] = { + BOOST_MATH_STATIC const T P2[] = { 1.159315158e-01f, 2.789828686e-01f, 2.524902861e-02f, @@ -133,14 +139,14 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) // Maximum Relative Change in Control Points : 9.064e-02 // Max Error found at float precision = Poly : 5.065020e-08 - static const T P[] = + BOOST_MATH_STATIC const T P[] = { 2.533141220e-01f, 5.221502603e-01f, 6.380180669e-02f, -5.934976547e-02f }; - static const T Q[] = + BOOST_MATH_STATIC const T Q[] = { 1.000000000e+00f, 2.679722431e+00f, @@ -158,7 +164,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) } template -T bessel_k0_imp(const T& x, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T bessel_k0_imp(const T& x, const boost::math::integral_constant&) { BOOST_MATH_STD_USING if(x <= 1) @@ -167,8 +173,8 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) // Expected Error Term : -6.077e-17 // Maximum Relative Change in Control Points : 7.797e-02 // Max Error found at double precision = Poly : 1.003156e-16 - static const T Y = 1.137250900268554688; - static const T P[] = + BOOST_MATH_STATIC const T Y = 1.137250900268554688; + BOOST_MATH_STATIC const T P[] = { -1.372509002685546267e-01, 2.574916117833312855e-01, @@ -176,7 +182,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) 5.445476986653926759e-04, 7.125159422136622118e-06 }; - static const T Q[] = + BOOST_MATH_STATIC const T Q[] = { 1.000000000000000000e+00, -5.458333438017788530e-02, @@ -191,7 +197,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) // Expected Error Term : 3.392e-18 // Maximum Relative Change in Control Points : 2.041e-02 // Max Error found at double precision = Poly : 2.513112e-16 - static const T P2[] = + BOOST_MATH_STATIC const T P2[] = { 1.159315156584124484e-01, 2.789828789146031732e-01, @@ -212,8 +218,8 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) // Maximum Relative Change in Control Points : 2.757e-01 // Max Error found at double precision = Poly : 1.001560e-16 - static const T Y = 1; - static const T P[] = + BOOST_MATH_STATIC const T Y = 1; + BOOST_MATH_STATIC const T P[] = { 2.533141373155002416e-01, 3.628342133984595192e+00, @@ -225,7 +231,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) -1.414237994269995877e+00, -9.369168119754924625e-02 }; - static const T Q[] = + BOOST_MATH_STATIC const T Q[] = { 1.000000000000000000e+00, 1.494194694879908328e+01, @@ -248,7 +254,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) } template -T bessel_k0_imp(const T& x, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T bessel_k0_imp(const T& x, const boost::math::integral_constant&) { BOOST_MATH_STD_USING if(x <= 1) @@ -257,8 +263,8 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) // Expected Error Term : 2.180e-22 // Maximum Relative Change in Control Points : 2.943e-01 // Max Error found at float80 precision = Poly : 3.923207e-20 - static const T Y = 1.137250900268554687500e+00; - static const T P[] = + BOOST_MATH_STATIC const T Y = 1.137250900268554687500e+00; + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -1.372509002685546875002e-01), BOOST_MATH_BIG_CONSTANT(T, 64, 2.566481981037407600436e-01), @@ -267,7 +273,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) BOOST_MATH_BIG_CONSTANT(T, 64, 1.213747930378196492543e-05), BOOST_MATH_BIG_CONSTANT(T, 64, 9.423709328020389560844e-08) }; - static const T Q[] = + BOOST_MATH_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.000000000000000000000e+00), BOOST_MATH_BIG_CONSTANT(T, 64, -4.843828412587773008342e-02), @@ -284,7 +290,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) // Expected Error Term : -2.434e-21 // Maximum Relative Change in Control Points : 2.459e-02 // Max Error found at float80 precision = Poly : 1.482487e-19 - static const T P2[] = + BOOST_MATH_STATIC const T P2[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.159315156584124488110e-01), BOOST_MATH_BIG_CONSTANT(T, 64, 2.764832791416047889734e-01), @@ -292,7 +298,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) BOOST_MATH_BIG_CONSTANT(T, 64, 3.660777862036966089410e-04), BOOST_MATH_BIG_CONSTANT(T, 64, 2.094942446930673386849e-06) }; - static const T Q2[] = + BOOST_MATH_STATIC const T Q2[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.000000000000000000000e+00), BOOST_MATH_BIG_CONSTANT(T, 64, -2.156100313881251616320e-02), @@ -308,8 +314,8 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) // Expected Error Term : 2.236e-21 // Maximum Relative Change in Control Points : 3.021e-01 //Max Error found at float80 precision = Poly : 8.727378e-20 - static const T Y = 1; - static const T P[] = + BOOST_MATH_STATIC const T Y = 1; + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 2.533141373155002512056e-01), BOOST_MATH_BIG_CONSTANT(T, 64, 5.417942070721928652715e+00), @@ -323,7 +329,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) BOOST_MATH_BIG_CONSTANT(T, 64, -4.059789241612946683713e+00), BOOST_MATH_BIG_CONSTANT(T, 64, -1.612783121537333908889e-01) }; - static const T Q[] = + BOOST_MATH_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.000000000000000000000e+00), BOOST_MATH_BIG_CONSTANT(T, 64, 2.200669254769325861404e+01), @@ -348,7 +354,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) } template -T bessel_k0_imp(const T& x, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T bessel_k0_imp(const T& x, const boost::math::integral_constant&) { BOOST_MATH_STD_USING if(x <= 1) @@ -357,8 +363,8 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) // Expected Error Term : 5.682e-37 // Maximum Relative Change in Control Points : 6.094e-04 // Max Error found at float128 precision = Poly : 5.338213e-35 - static const T Y = 1.137250900268554687500000000000000000e+00f; - static const T P[] = + BOOST_MATH_STATIC const T Y = 1.137250900268554687500000000000000000e+00f; + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 113, -1.372509002685546875000000000000000006e-01), BOOST_MATH_BIG_CONSTANT(T, 113, 2.556212905071072782462974351698081303e-01), @@ -369,7 +375,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) BOOST_MATH_BIG_CONSTANT(T, 113, 1.752489221949580551692915881999762125e-09), BOOST_MATH_BIG_CONSTANT(T, 113, 5.243010555737173524710512824955368526e-12) }; - static const T Q[] = + BOOST_MATH_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 1.000000000000000000000000000000000000e+00), BOOST_MATH_BIG_CONSTANT(T, 113, -4.095631064064621099785696980653193721e-02), @@ -387,7 +393,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) // Expected Error Term : 5.105e-38 // Maximum Relative Change in Control Points : 9.734e-03 // Max Error found at float128 precision = Poly : 1.688806e-34 - static const T P2[] = + BOOST_MATH_STATIC const T P2[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 1.159315156584124488107200313757741370e-01), BOOST_MATH_BIG_CONSTANT(T, 113, 2.789828789146031122026800078439435369e-01), @@ -413,8 +419,8 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) // Expected Error Term : 4.917e-40 // Maximum Relative Change in Control Points : 3.385e-01 // Max Error found at float128 precision = Poly : 1.567573e-34 - static const T Y = 1; - static const T P[] = + BOOST_MATH_STATIC const T Y = 1; + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 2.533141373155002512078826424055226265e-01), BOOST_MATH_BIG_CONSTANT(T, 113, 2.001949740768235770078339977110749204e+01), @@ -439,7 +445,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) BOOST_MATH_BIG_CONSTANT(T, 113, -4.201632288615609937883545928660649813e+03), BOOST_MATH_BIG_CONSTANT(T, 113, -3.690820607338480548346746717311811406e+01) }; - static const T Q[] = + BOOST_MATH_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 1.000000000000000000000000000000000000e+00), BOOST_MATH_BIG_CONSTANT(T, 113, 7.964877874035741452203497983642653107e+01), @@ -475,33 +481,33 @@ T bessel_k0_imp(const T& x, const std::integral_constant&) } template -T bessel_k0_imp(const T& x, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T bessel_k0_imp(const T& x, const boost::math::integral_constant&) { if(boost::math::tools::digits() <= 24) - return bessel_k0_imp(x, std::integral_constant()); + return bessel_k0_imp(x, boost::math::integral_constant()); else if(boost::math::tools::digits() <= 53) - return bessel_k0_imp(x, std::integral_constant()); + return bessel_k0_imp(x, boost::math::integral_constant()); else if(boost::math::tools::digits() <= 64) - return bessel_k0_imp(x, std::integral_constant()); + return bessel_k0_imp(x, boost::math::integral_constant()); else if(boost::math::tools::digits() <= 113) - return bessel_k0_imp(x, std::integral_constant()); + return bessel_k0_imp(x, boost::math::integral_constant()); BOOST_MATH_ASSERT(0); return 0; } template -inline T bessel_k0(const T& x) +BOOST_MATH_GPU_ENABLED inline T bessel_k0(const T& x) { - typedef std::integral_constant::digits == 0) || (std::numeric_limits::radix != 2)) ? + typedef boost::math::integral_constant::digits == 0) || (boost::math::numeric_limits::radix != 2)) ? 0 : - std::numeric_limits::digits <= 24 ? + boost::math::numeric_limits::digits <= 24 ? 24 : - std::numeric_limits::digits <= 53 ? + boost::math::numeric_limits::digits <= 53 ? 53 : - std::numeric_limits::digits <= 64 ? + boost::math::numeric_limits::digits <= 64 ? 64 : - std::numeric_limits::digits <= 113 ? + boost::math::numeric_limits::digits <= 113 ? 113 : -1 > tag_type; diff --git a/include/boost/math/special_functions/detail/bessel_k1.hpp b/include/boost/math/special_functions/detail/bessel_k1.hpp index bd37f90215..49846dc8c5 100644 --- a/include/boost/math/special_functions/detail/bessel_k1.hpp +++ b/include/boost/math/special_functions/detail/bessel_k1.hpp @@ -13,6 +13,10 @@ #pragma warning(disable:4702) // Unreachable code (release mode only warning) #endif +#include +#include +#include +#include #include #include #include @@ -44,36 +48,38 @@ namespace boost { namespace math { namespace detail{ template - T bessel_k1(const T&); + BOOST_MATH_GPU_ENABLED T bessel_k1(const T&); template struct bessel_k1_initializer { struct init { - init() + BOOST_MATH_GPU_ENABLED init() { do_init(tag()); } - static void do_init(const std::integral_constant&) + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&) { bessel_k1(T(0.5)); bessel_k1(T(2)); bessel_k1(T(6)); } - static void do_init(const std::integral_constant&) + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&) { bessel_k1(T(0.5)); bessel_k1(T(6)); } template - static void do_init(const U&) {} - void force_instantiate()const {} + BOOST_MATH_GPU_ENABLED static void do_init(const U&) {} + BOOST_MATH_GPU_ENABLED void force_instantiate()const {} }; - static const init initializer; - static void force_instantiate() + BOOST_MATH_STATIC const init initializer; + BOOST_MATH_GPU_ENABLED static void force_instantiate() { + #ifndef BOOST_MATH_HAS_GPU_SUPPORT initializer.force_instantiate(); + #endif } }; @@ -82,14 +88,14 @@ namespace boost { namespace math { namespace detail{ template - inline T bessel_k1_imp(const T&, const std::integral_constant&) + inline BOOST_MATH_GPU_ENABLED T bessel_k1_imp(const T&, const boost::math::integral_constant&) { BOOST_MATH_ASSERT(0); return 0; } template - T bessel_k1_imp(const T& x, const std::integral_constant&) + BOOST_MATH_GPU_ENABLED T bessel_k1_imp(const T& x, const boost::math::integral_constant&) { BOOST_MATH_STD_USING if(x <= 1) @@ -98,14 +104,14 @@ namespace boost { namespace math { namespace detail{ // Expected Error Term : -3.053e-12 // Maximum Relative Change in Control Points : 4.927e-02 // Max Error found at float precision = Poly : 7.918347e-10 - static const T Y = 8.695471287e-02f; - static const T P[] = + BOOST_MATH_STATIC const T Y = 8.695471287e-02f; + BOOST_MATH_STATIC const T P[] = { -3.621379531e-03f, 7.131781976e-03f, -1.535278300e-05f }; - static const T Q[] = + BOOST_MATH_STATIC const T Q[] = { 1.000000000e+00f, -5.173102701e-02f, @@ -118,7 +124,7 @@ namespace boost { namespace math { namespace detail{ // Maximum Deviation Found: 3.556e-08 // Expected Error Term : -3.541e-08 // Maximum Relative Change in Control Points : 8.203e-02 - static const T P2[] = + BOOST_MATH_STATIC const T P2[] = { -3.079657469e-01f, -8.537108913e-02f, @@ -134,15 +140,15 @@ namespace boost { namespace math { namespace detail{ // Expected Error Term : -3.227e-08 // Maximum Relative Change in Control Points : 9.917e-02 // Max Error found at float precision = Poly : 6.084411e-08 - static const T Y = 1.450342178f; - static const T P[] = + BOOST_MATH_STATIC const T Y = 1.450342178f; + BOOST_MATH_STATIC const T P[] = { -1.970280088e-01f, 2.188747807e-02f, 7.270394756e-01f, 2.490678196e-01f }; - static const T Q[] = + BOOST_MATH_STATIC const T Q[] = { 1.000000000e+00f, 2.274292882e+00f, @@ -160,7 +166,7 @@ namespace boost { namespace math { namespace detail{ } template - T bessel_k1_imp(const T& x, const std::integral_constant&) + BOOST_MATH_GPU_ENABLED T bessel_k1_imp(const T& x, const boost::math::integral_constant&) { BOOST_MATH_STD_USING if(x <= 1) @@ -169,15 +175,15 @@ namespace boost { namespace math { namespace detail{ // Expected Error Term : 1.921e-17 // Maximum Relative Change in Control Points : 5.287e-03 // Max Error found at double precision = Poly : 2.004747e-17 - static const T Y = 8.69547128677368164e-02f; - static const T P[] = + BOOST_MATH_STATIC const T Y = 8.69547128677368164e-02f; + BOOST_MATH_STATIC const T P[] = { -3.62137953440350228e-03, 7.11842087490330300e-03, 1.00302560256614306e-05, 1.77231085381040811e-06 }; - static const T Q[] = + BOOST_MATH_STATIC const T Q[] = { 1.00000000000000000e+00, -4.80414794429043831e-02, @@ -193,14 +199,14 @@ namespace boost { namespace math { namespace detail{ // Maximum Relative Change in Control Points : 3.103e-04 // Max Error found at double precision = Poly : 1.246698e-16 - static const T P2[] = + BOOST_MATH_STATIC const T P2[] = { -3.07965757829206184e-01, -7.80929703673074907e-02, -2.70619343754051620e-03, -2.49549522229072008e-05 }; - static const T Q2[] = + BOOST_MATH_STATIC const T Q2[] = { 1.00000000000000000e+00, -2.36316836412163098e-02, @@ -217,8 +223,8 @@ namespace boost { namespace math { namespace detail{ // Maximum Relative Change in Control Points : 2.786e-01 // Max Error found at double precision = Poly : 1.258798e-16 - static const T Y = 1.45034217834472656f; - static const T P[] = + BOOST_MATH_STATIC const T Y = 1.45034217834472656f; + BOOST_MATH_STATIC const T P[] = { -1.97028041029226295e-01, -2.32408961548087617e+00, @@ -230,7 +236,7 @@ namespace boost { namespace math { namespace detail{ 6.62582288933739787e+00, 3.08851840645286691e-01 }; - static const T Q[] = + BOOST_MATH_STATIC const T Q[] = { 1.00000000000000000e+00, 1.41811409298826118e+01, @@ -253,7 +259,7 @@ namespace boost { namespace math { namespace detail{ } template - T bessel_k1_imp(const T& x, const std::integral_constant&) + BOOST_MATH_GPU_ENABLED T bessel_k1_imp(const T& x, const boost::math::integral_constant&) { BOOST_MATH_STD_USING if(x <= 1) @@ -262,8 +268,8 @@ namespace boost { namespace math { namespace detail{ // Expected Error Term : -5.548e-23 // Maximum Relative Change in Control Points : 2.002e-03 // Max Error found at float80 precision = Poly : 9.352785e-22 - static const T Y = 8.695471286773681640625e-02f; - static const T P[] = + BOOST_MATH_STATIC const T Y = 8.695471286773681640625e-02f; + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -3.621379534403483072861e-03), BOOST_MATH_BIG_CONSTANT(T, 64, 7.102135866103952705932e-03), @@ -271,7 +277,7 @@ namespace boost { namespace math { namespace detail{ BOOST_MATH_BIG_CONSTANT(T, 64, 2.537484002571894870830e-06), BOOST_MATH_BIG_CONSTANT(T, 64, 6.603228256820000135990e-09) }; - static const T Q[] = + BOOST_MATH_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.000000000000000000000e+00), BOOST_MATH_BIG_CONSTANT(T, 64, -4.354457194045068370363e-02), @@ -287,7 +293,7 @@ namespace boost { namespace math { namespace detail{ // Expected Error Term : 1.995e-23 // Maximum Relative Change in Control Points : 8.174e-04 // Max Error found at float80 precision = Poly : 4.137325e-20 - static const T P2[] = + BOOST_MATH_STATIC const T P2[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -3.079657578292062244054e-01), BOOST_MATH_BIG_CONSTANT(T, 64, -7.963049154965966503231e-02), @@ -295,7 +301,7 @@ namespace boost { namespace math { namespace detail{ BOOST_MATH_BIG_CONSTANT(T, 64, -4.023052834702215699504e-05), BOOST_MATH_BIG_CONSTANT(T, 64, -1.719459155018493821839e-07) }; - static const T Q2[] = + BOOST_MATH_STATIC const T Q2[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.000000000000000000000e+00), BOOST_MATH_BIG_CONSTANT(T, 64, -1.863917670410152669768e-02), @@ -312,8 +318,8 @@ namespace boost { namespace math { namespace detail{ // Expected Error Term : -3.302e-21 // Maximum Relative Change in Control Points : 3.432e-01 // Max Error found at float80 precision = Poly : 1.083755e-19 - static const T Y = 1.450342178344726562500e+00f; - static const T P[] = + BOOST_MATH_STATIC const T Y = 1.450342178344726562500e+00f; + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -1.970280410292263112917e-01), BOOST_MATH_BIG_CONSTANT(T, 64, -4.058564803062959169322e+00), @@ -328,7 +334,7 @@ namespace boost { namespace math { namespace detail{ BOOST_MATH_BIG_CONSTANT(T, 64, 4.319614662598089438939e+00), BOOST_MATH_BIG_CONSTANT(T, 64, 3.710715864316521856193e-02) }; - static const T Q[] = + BOOST_MATH_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.000000000000000000000e+00), BOOST_MATH_BIG_CONSTANT(T, 64, 2.298433045824439052398e+01), @@ -353,7 +359,7 @@ namespace boost { namespace math { namespace detail{ } template - T bessel_k1_imp(const T& x, const std::integral_constant&) + BOOST_MATH_GPU_ENABLED T bessel_k1_imp(const T& x, const boost::math::integral_constant&) { BOOST_MATH_STD_USING if(x <= 1) @@ -362,8 +368,8 @@ namespace boost { namespace math { namespace detail{ // Expected Error Term : -7.119e-35 // Maximum Relative Change in Control Points : 1.207e-03 // Max Error found at float128 precision = Poly : 7.143688e-35 - static const T Y = 8.695471286773681640625000000000000000e-02f; - static const T P[] = + BOOST_MATH_STATIC const T Y = 8.695471286773681640625000000000000000e-02f; + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 113, -3.621379534403483072916666666666595475e-03), BOOST_MATH_BIG_CONSTANT(T, 113, 7.074117676930975433219826471336547627e-03), @@ -373,7 +379,7 @@ namespace boost { namespace math { namespace detail{ BOOST_MATH_BIG_CONSTANT(T, 113, 2.347140307321161346703214099534250263e-10), BOOST_MATH_BIG_CONSTANT(T, 113, 5.569608494081482873946791086435679661e-13) }; - static const T Q[] = + BOOST_MATH_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 1.000000000000000000000000000000000000e+00), BOOST_MATH_BIG_CONSTANT(T, 113, -3.580768910152105375615558920428350204e-02), @@ -391,7 +397,7 @@ namespace boost { namespace math { namespace detail{ // Expected Error Term : 4.473e-37 // Maximum Relative Change in Control Points : 8.550e-04 // Max Error found at float128 precision = Poly : 8.167701e-35 - static const T P2[] = + BOOST_MATH_STATIC const T P2[] = { BOOST_MATH_BIG_CONSTANT(T, 113, -3.079657578292062244053600156878870690e-01), BOOST_MATH_BIG_CONSTANT(T, 113, -8.133183745732467770755578848987414875e-02), @@ -401,7 +407,7 @@ namespace boost { namespace math { namespace detail{ BOOST_MATH_BIG_CONSTANT(T, 113, -1.632502325880313239698965376754406011e-09), BOOST_MATH_BIG_CONSTANT(T, 113, -2.311973065898784812266544485665624227e-12) }; - static const T Q2[] = + BOOST_MATH_STATIC const T Q2[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 1.000000000000000000000000000000000000e+00), BOOST_MATH_BIG_CONSTANT(T, 113, -1.311471216733781016657962995723287450e-02), @@ -418,8 +424,8 @@ namespace boost { namespace math { namespace detail{ { // Max error in interpolated form: 5.307e-37 // Max Error found at float128 precision = Poly: 7.087862e-35 - static const T Y = 1.5023040771484375f; - static const T P[] = + BOOST_MATH_STATIC const T Y = 1.5023040771484375f; + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 113, -2.489899398329369710528254347931380044e-01), BOOST_MATH_BIG_CONSTANT(T, 113, -6.819080211203854781858815596508456873e+00), @@ -438,7 +444,7 @@ namespace boost { namespace math { namespace detail{ BOOST_MATH_BIG_CONSTANT(T, 113, 1.039705646510167437971862966128055524e+00), BOOST_MATH_BIG_CONSTANT(T, 113, 1.008418100718254816100425022904039530e-02) }; - static const T Q[] = + BOOST_MATH_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 1.000000000000000000000000000000000000e+00), BOOST_MATH_BIG_CONSTANT(T, 113, 2.927456835239137986889227412815459529e+01), @@ -465,8 +471,8 @@ namespace boost { namespace math { namespace detail{ // Expected Error Term : -6.565e-40 // Maximum Relative Change in Control Points : 1.880e-01 // Max Error found at float128 precision = Poly : 2.943572e-35 - static const T Y = 1.308816909790039062500000000000000000f; - static const T P[] = + BOOST_MATH_STATIC const T Y = 1.308816909790039062500000000000000000f; + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 113, -5.550277247453881129211735759447737350e-02), BOOST_MATH_BIG_CONSTANT(T, 113, -3.485883080219574328217554864956175929e+00), @@ -486,7 +492,7 @@ namespace boost { namespace math { namespace detail{ BOOST_MATH_BIG_CONSTANT(T, 113, 8.981057433937398731355768088809437625e+05), BOOST_MATH_BIG_CONSTANT(T, 113, 2.519440069856232098711793483639792952e+04) }; - static const T Q[] = + BOOST_MATH_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 1.000000000000000000000000000000000000e+00), BOOST_MATH_BIG_CONSTANT(T, 113, 7.127348248283623146544565916604103560e+01), @@ -517,33 +523,33 @@ namespace boost { namespace math { namespace detail{ } template - T bessel_k1_imp(const T& x, const std::integral_constant&) + BOOST_MATH_GPU_ENABLED T bessel_k1_imp(const T& x, const boost::math::integral_constant&) { if(boost::math::tools::digits() <= 24) - return bessel_k1_imp(x, std::integral_constant()); + return bessel_k1_imp(x, boost::math::integral_constant()); else if(boost::math::tools::digits() <= 53) - return bessel_k1_imp(x, std::integral_constant()); + return bessel_k1_imp(x, boost::math::integral_constant()); else if(boost::math::tools::digits() <= 64) - return bessel_k1_imp(x, std::integral_constant()); + return bessel_k1_imp(x, boost::math::integral_constant()); else if(boost::math::tools::digits() <= 113) - return bessel_k1_imp(x, std::integral_constant()); + return bessel_k1_imp(x, boost::math::integral_constant()); BOOST_MATH_ASSERT(0); return 0; } - template - inline T bessel_k1(const T& x) + template + inline BOOST_MATH_GPU_ENABLED T bessel_k1(const T& x) { - typedef std::integral_constant::digits == 0) || (std::numeric_limits::radix != 2)) ? + typedef boost::math::integral_constant::digits == 0) || (boost::math::numeric_limits::radix != 2)) ? 0 : - std::numeric_limits::digits <= 24 ? + boost::math::numeric_limits::digits <= 24 ? 24 : - std::numeric_limits::digits <= 53 ? + boost::math::numeric_limits::digits <= 53 ? 53 : - std::numeric_limits::digits <= 64 ? + boost::math::numeric_limits::digits <= 64 ? 64 : - std::numeric_limits::digits <= 113 ? + boost::math::numeric_limits::digits <= 113 ? 113 : -1 > tag_type; diff --git a/include/boost/math/special_functions/detail/bessel_kn.hpp b/include/boost/math/special_functions/detail/bessel_kn.hpp index d0ddcd0db4..41becc8aa9 100644 --- a/include/boost/math/special_functions/detail/bessel_kn.hpp +++ b/include/boost/math/special_functions/detail/bessel_kn.hpp @@ -10,8 +10,12 @@ #pragma once #endif +#include +#include +#include #include #include +#include #include // Modified Bessel function of the second kind of integer order @@ -20,14 +24,14 @@ namespace boost { namespace math { namespace detail{ template -T bessel_kn(int n, T x, const Policy& pol) +BOOST_MATH_GPU_ENABLED T bessel_kn(int n, T x, const Policy& pol) { BOOST_MATH_STD_USING T value, current, prev; using namespace boost::math::tools; - static const char* function = "boost::math::bessel_kn<%1%>(%1%,%1%)"; + constexpr auto function = "boost::math::bessel_kn<%1%>(%1%,%1%)"; if (x < 0) { diff --git a/include/boost/math/special_functions/detail/bessel_y0.hpp b/include/boost/math/special_functions/detail/bessel_y0.hpp index 1679820d19..f1aea6acbd 100644 --- a/include/boost/math/special_functions/detail/bessel_y0.hpp +++ b/include/boost/math/special_functions/detail/bessel_y0.hpp @@ -12,6 +12,7 @@ #pragma warning(disable:4702) // Unreachable code (release mode only warning) #endif +#include #include #include #include @@ -36,12 +37,12 @@ namespace boost { namespace math { namespace detail{ template -T bessel_y0(T x, const Policy&); +BOOST_MATH_GPU_ENABLED T bessel_y0(T x, const Policy&); template -T bessel_y0(T x, const Policy&) +BOOST_MATH_GPU_ENABLED T bessel_y0(T x, const Policy&) { - static const T P1[] = { + BOOST_MATH_STATIC const T P1[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0723538782003176831e+11)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -8.3716255451260504098e+09)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.0422274357376619816e+08)), @@ -49,7 +50,7 @@ T bessel_y0(T x, const Policy&) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0102532948020907590e+04)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.8402381979244993524e+01)), }; - static const T Q1[] = { + BOOST_MATH_STATIC const T Q1[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 5.8873865738997033405e+11)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 8.1617187777290363573e+09)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 5.5662956624278251596e+07)), @@ -57,7 +58,7 @@ T bessel_y0(T x, const Policy&) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 6.6475986689240190091e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)), }; - static const T P2[] = { + BOOST_MATH_STATIC const T P2[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -2.2213976967566192242e+13)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -5.5107435206722644429e+11)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 4.3600098638603061642e+10)), @@ -66,7 +67,7 @@ T bessel_y0(T x, const Policy&) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.4566865832663635920e+04)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.7427031242901594547e+01)), }; - static const T Q2[] = { + BOOST_MATH_STATIC const T Q2[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 4.3386146580707264428e+14)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 5.4266824419412347550e+12)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 3.4015103849971240096e+10)), @@ -75,7 +76,7 @@ T bessel_y0(T x, const Policy&) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 8.3030857612070288823e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)), }; - static const T P3[] = { + BOOST_MATH_STATIC const T P3[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -8.0728726905150210443e+15)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 6.7016641869173237784e+14)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.2829912364088687306e+11)), @@ -85,7 +86,7 @@ T bessel_y0(T x, const Policy&) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.1363534169313901632e+04)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.7439661319197499338e+01)), }; - static const T Q3[] = { + BOOST_MATH_STATIC const T Q3[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 3.4563724628846457519e+17)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 3.9272425569640309819e+15)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.2598377924042897629e+13)), @@ -95,7 +96,7 @@ T bessel_y0(T x, const Policy&) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 8.7903362168128450017e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)), }; - static const T PC[] = { + BOOST_MATH_STATIC const T PC[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.2779090197304684302e+04)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 4.1345386639580765797e+04)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.1170523380864944322e+04)), @@ -103,7 +104,7 @@ T bessel_y0(T x, const Policy&) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.5376201909008354296e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 8.8961548424210455236e-01)), }; - static const T QC[] = { + BOOST_MATH_STATIC const T QC[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.2779090197304684318e+04)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 4.1370412495510416640e+04)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.1215350561880115730e+04)), @@ -111,7 +112,7 @@ T bessel_y0(T x, const Policy&) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.5711159858080893649e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)), }; - static const T PS[] = { + BOOST_MATH_STATIC const T PS[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -8.9226600200800094098e+01)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.8591953644342993800e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.1183429920482737611e+02)), @@ -119,7 +120,7 @@ T bessel_y0(T x, const Policy&) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.2441026745835638459e+00)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -8.8033303048680751817e-03)), }; - static const T QS[] = { + BOOST_MATH_STATIC const T QS[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 5.7105024128512061905e+03)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.1951131543434613647e+04)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 7.2642780169211018836e+03)), @@ -127,7 +128,7 @@ T bessel_y0(T x, const Policy&) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 9.0593769594993125859e+01)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)), }; - static const T x1 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 8.9357696627916752158e-01)), + BOOST_MATH_STATIC const T x1 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 8.9357696627916752158e-01)), x2 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 3.9576784193148578684e+00)), x3 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 7.0860510603017726976e+00)), x11 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.280e+02)), diff --git a/include/boost/math/special_functions/detail/bessel_y1.hpp b/include/boost/math/special_functions/detail/bessel_y1.hpp index 3ac696bb5c..0f0dbdf3bb 100644 --- a/include/boost/math/special_functions/detail/bessel_y1.hpp +++ b/include/boost/math/special_functions/detail/bessel_y1.hpp @@ -12,6 +12,7 @@ #pragma warning(disable:4702) // Unreachable code (release mode only warning) #endif +#include #include #include #include @@ -36,12 +37,12 @@ namespace boost { namespace math { namespace detail{ template -T bessel_y1(T x, const Policy&); +BOOST_MATH_GPU_ENABLED T bessel_y1(T x, const Policy&); template -T bessel_y1(T x, const Policy&) +BOOST_MATH_GPU_ENABLED T bessel_y1(T x, const Policy&) { - static const T P1[] = { + BOOST_MATH_STATIC const T P1[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 4.0535726612579544093e+13)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 5.4708611716525426053e+12)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -3.7595974497819597599e+11)), @@ -50,7 +51,7 @@ T bessel_y1(T x, const Policy&) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.2157953222280260820e+05)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -3.1714424660046133456e+02)), }; - static const T Q1[] = { + BOOST_MATH_STATIC const T Q1[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 3.0737873921079286084e+14)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 4.1272286200406461981e+12)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.7800352738690585613e+10)), @@ -59,7 +60,7 @@ T bessel_y1(T x, const Policy&) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 8.2079908168393867438e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)), }; - static const T P2[] = { + BOOST_MATH_STATIC const T P2[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.1514276357909013326e+19)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -5.6808094574724204577e+18)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -2.3638408497043134724e+16)), @@ -70,7 +71,7 @@ T bessel_y1(T x, const Policy&) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.9153806858264202986e+06)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.2337180442012953128e+03)), }; - static const T Q2[] = { + BOOST_MATH_STATIC const T Q2[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 5.3321844313316185697e+20)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 5.6968198822857178911e+18)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 3.0837179548112881950e+16)), @@ -81,7 +82,7 @@ T bessel_y1(T x, const Policy&) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.2855164849321609336e+03)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)), }; - static const T PC[] = { + BOOST_MATH_STATIC const T PC[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -4.4357578167941278571e+06)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -9.9422465050776411957e+06)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -6.6033732483649391093e+06)), @@ -90,7 +91,7 @@ T bessel_y1(T x, const Policy&) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.6116166443246101165e+03)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.0)), }; - static const T QC[] = { + BOOST_MATH_STATIC const T QC[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -4.4357578167941278568e+06)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -9.9341243899345856590e+06)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -6.5853394797230870728e+06)), @@ -99,7 +100,7 @@ T bessel_y1(T x, const Policy&) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -1.4550094401904961825e+03)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)), }; - static const T PS[] = { + BOOST_MATH_STATIC const T PS[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 3.3220913409857223519e+04)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 8.5145160675335701966e+04)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 6.6178836581270835179e+04)), @@ -108,7 +109,7 @@ T bessel_y1(T x, const Policy&) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 3.5265133846636032186e+01)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.0)), }; - static const T QS[] = { + BOOST_MATH_STATIC const T QS[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 7.0871281941028743574e+05)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.8194580422439972989e+06)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.4194606696037208929e+06)), @@ -117,7 +118,7 @@ T bessel_y1(T x, const Policy&) static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 8.6383677696049909675e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)), }; - static const T x1 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.1971413260310170351e+00)), + BOOST_MATH_STATIC const T x1 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.1971413260310170351e+00)), x2 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 5.4296810407941351328e+00)), x11 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 5.620e+02)), x12 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 1.8288260310170351490e-03)), diff --git a/include/boost/math/special_functions/detail/bessel_yn.hpp b/include/boost/math/special_functions/detail/bessel_yn.hpp index 73dee0bbb8..a45d1761cd 100644 --- a/include/boost/math/special_functions/detail/bessel_yn.hpp +++ b/include/boost/math/special_functions/detail/bessel_yn.hpp @@ -10,9 +10,11 @@ #pragma once #endif +#include #include #include #include +#include #include // Bessel function of the second kind of integer order @@ -21,14 +23,14 @@ namespace boost { namespace math { namespace detail{ template -T bessel_yn(int n, T x, const Policy& pol) +BOOST_MATH_GPU_ENABLED T bessel_yn(int n, T x, const Policy& pol) { BOOST_MATH_STD_USING T value, factor, current, prev; using namespace boost::math::tools; - static const char* function = "boost::math::bessel_yn<%1%>(%1%,%1%)"; + constexpr auto function = "boost::math::bessel_yn<%1%>(%1%,%1%)"; if ((x == 0) && (n == 0)) { diff --git a/include/boost/math/special_functions/detail/erf_inv.hpp b/include/boost/math/special_functions/detail/erf_inv.hpp index 0054a74266..cb65cffbc1 100644 --- a/include/boost/math/special_functions/detail/erf_inv.hpp +++ b/include/boost/math/special_functions/detail/erf_inv.hpp @@ -1,4 +1,5 @@ // (C) Copyright John Maddock 2006. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -13,6 +14,10 @@ #pragma warning(disable:4702) // Unreachable code: optimization warning #endif +#include + +#ifndef BOOST_MATH_HAS_NVRTC + #include namespace boost{ namespace math{ @@ -23,7 +28,7 @@ namespace detail{ // this version is for 80-bit long double's and smaller: // template -T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constant*) +BOOST_MATH_GPU_ENABLED T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constant&) { BOOST_MATH_STD_USING // for ADL of std names. @@ -44,8 +49,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan // Maximum Deviation Found (actual error term at infinite precision) 8.030e-21 // // LCOV_EXCL_START - static const float Y = 0.0891314744949340820313f; - static const T P[] = { + BOOST_MATH_STATIC_LOCAL_VARIABLE const float Y = 0.0891314744949340820313f; + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.000508781949658280665617), BOOST_MATH_BIG_CONSTANT(T, 64, -0.00836874819741736770379), BOOST_MATH_BIG_CONSTANT(T, 64, 0.0334806625409744615033), @@ -55,7 +60,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan BOOST_MATH_BIG_CONSTANT(T, 64, 0.00822687874676915743155), BOOST_MATH_BIG_CONSTANT(T, 64, -0.00538772965071242932965) }; - static const T Q[] = { + BOOST_MATH_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, -0.970005043303290640362), BOOST_MATH_BIG_CONSTANT(T, 64, -1.56574558234175846809), @@ -87,8 +92,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan // Maximum Deviation Found (error term) 4.811e-20 // // LCOV_EXCL_START - static const float Y = 2.249481201171875f; - static const T P[] = { + BOOST_MATH_STATIC_LOCAL_VARIABLE const float Y = 2.249481201171875f; + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.202433508355938759655), BOOST_MATH_BIG_CONSTANT(T, 64, 0.105264680699391713268), BOOST_MATH_BIG_CONSTANT(T, 64, 8.37050328343119927838), @@ -99,7 +104,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan BOOST_MATH_BIG_CONSTANT(T, 64, 21.1294655448340526258), BOOST_MATH_BIG_CONSTANT(T, 64, -3.67192254707729348546) }; - static const T Q[] = { + BOOST_MATH_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, 6.24264124854247537712), BOOST_MATH_BIG_CONSTANT(T, 64, 3.9713437953343869095), @@ -142,8 +147,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan { // LCOV_EXCL_START // Max error found: 1.089051e-20 - static const float Y = 0.807220458984375f; - static const T P[] = { + BOOST_MATH_STATIC_LOCAL_VARIABLE const float Y = 0.807220458984375f; + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.131102781679951906451), BOOST_MATH_BIG_CONSTANT(T, 64, -0.163794047193317060787), BOOST_MATH_BIG_CONSTANT(T, 64, 0.117030156341995252019), @@ -156,7 +161,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan BOOST_MATH_BIG_CONSTANT(T, 64, 0.285225331782217055858e-7), BOOST_MATH_BIG_CONSTANT(T, 64, -0.681149956853776992068e-9) }; - static const T Q[] = { + BOOST_MATH_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, 3.46625407242567245975), BOOST_MATH_BIG_CONSTANT(T, 64, 5.38168345707006855425), @@ -175,8 +180,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan { // LCOV_EXCL_START // Max error found: 8.389174e-21 - static const float Y = 0.93995571136474609375f; - static const T P[] = { + BOOST_MATH_STATIC_LOCAL_VARIABLE const float Y = 0.93995571136474609375f; + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.0350353787183177984712), BOOST_MATH_BIG_CONSTANT(T, 64, -0.00222426529213447927281), BOOST_MATH_BIG_CONSTANT(T, 64, 0.0185573306514231072324), @@ -187,7 +192,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan BOOST_MATH_BIG_CONSTANT(T, 64, -0.230404776911882601748e-9), BOOST_MATH_BIG_CONSTANT(T, 64, 0.266339227425782031962e-11) }; - static const T Q[] = { + BOOST_MATH_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, 1.3653349817554063097), BOOST_MATH_BIG_CONSTANT(T, 64, 0.762059164553623404043), @@ -205,8 +210,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan { // LCOV_EXCL_START // Max error found: 1.481312e-19 - static const float Y = 0.98362827301025390625f; - static const T P[] = { + BOOST_MATH_STATIC_LOCAL_VARIABLE const float Y = 0.98362827301025390625f; + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.0167431005076633737133), BOOST_MATH_BIG_CONSTANT(T, 64, -0.00112951438745580278863), BOOST_MATH_BIG_CONSTANT(T, 64, 0.00105628862152492910091), @@ -217,7 +222,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan BOOST_MATH_BIG_CONSTANT(T, 64, -0.281128735628831791805e-13), BOOST_MATH_BIG_CONSTANT(T, 64, 0.99055709973310326855e-16) }; - static const T Q[] = { + BOOST_MATH_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, 0.591429344886417493481), BOOST_MATH_BIG_CONSTANT(T, 64, 0.138151865749083321638), @@ -235,8 +240,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan { // LCOV_EXCL_START // Max error found: 5.697761e-20 - static const float Y = 0.99714565277099609375f; - static const T P[] = { + BOOST_MATH_STATIC_LOCAL_VARIABLE const float Y = 0.99714565277099609375f; + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.0024978212791898131227), BOOST_MATH_BIG_CONSTANT(T, 64, -0.779190719229053954292e-5), BOOST_MATH_BIG_CONSTANT(T, 64, 0.254723037413027451751e-4), @@ -246,7 +251,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan BOOST_MATH_BIG_CONSTANT(T, 64, 0.145596286718675035587e-11), BOOST_MATH_BIG_CONSTANT(T, 64, -0.116765012397184275695e-17) }; - static const T Q[] = { + BOOST_MATH_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, 0.207123112214422517181), BOOST_MATH_BIG_CONSTANT(T, 64, 0.0169410838120975906478), @@ -264,8 +269,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan { // LCOV_EXCL_START // Max error found: 1.279746e-20 - static const float Y = 0.99941349029541015625f; - static const T P[] = { + BOOST_MATH_STATIC_LOCAL_VARIABLE const float Y = 0.99941349029541015625f; + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.000539042911019078575891), BOOST_MATH_BIG_CONSTANT(T, 64, -0.28398759004727721098e-6), BOOST_MATH_BIG_CONSTANT(T, 64, 0.899465114892291446442e-6), @@ -275,7 +280,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan BOOST_MATH_BIG_CONSTANT(T, 64, 0.135880130108924861008e-14), BOOST_MATH_BIG_CONSTANT(T, 64, -0.348890393399948882918e-21) }; - static const T Q[] = { + BOOST_MATH_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, 0.0845746234001899436914), BOOST_MATH_BIG_CONSTANT(T, 64, 0.00282092984726264681981), @@ -310,12 +315,13 @@ struct erf_roots }; template -T erf_inv_imp(const T& p, const T& q, const Policy& pol, const std::integral_constant*) +T erf_inv_imp(const T& p, const T& q, const Policy& pol, const std::integral_constant&) { // // Generic version, get a guess that's accurate to 64-bits (10^-19) // - T guess = erf_inv_imp(p, q, pol, static_cast const*>(nullptr)); + using tag_type = std::integral_constant; + T guess = erf_inv_imp(p, q, pol, tag_type()); T result; // // If T has more bit's than 64 in it's mantissa then we need to iterate, @@ -344,14 +350,14 @@ T erf_inv_imp(const T& p, const T& q, const Policy& pol, const std::integral_con } // namespace detail template -typename tools::promote_args::type erfc_inv(T z, const Policy& pol) +BOOST_MATH_GPU_ENABLED typename tools::promote_args::type erfc_inv(T z, const Policy& pol) { typedef typename tools::promote_args::type result_type; // // Begin by testing for domain errors, and other special cases: // - static const char* function = "boost::math::erfc_inv<%1%>(%1%, %1%)"; + constexpr auto function = "boost::math::erfc_inv<%1%>(%1%, %1%)"; if((z < 0) || (z > 2)) return policies::raise_domain_error(function, "Argument outside range [0,2] in inverse erfc function (got p=%1%).", z, pol); if(z == 0) @@ -401,18 +407,18 @@ typename tools::promote_args::type erfc_inv(T z, const Policy& pol) // And get the result, negating where required: // return s * policies::checked_narrowing_cast( - detail::erf_inv_imp(static_cast(p), static_cast(q), forwarding_policy(), static_cast(nullptr)), function); + detail::erf_inv_imp(static_cast(p), static_cast(q), forwarding_policy(), tag_type()), function); } template -typename tools::promote_args::type erf_inv(T z, const Policy& pol) +BOOST_MATH_GPU_ENABLED typename tools::promote_args::type erf_inv(T z, const Policy& pol) { typedef typename tools::promote_args::type result_type; // // Begin by testing for domain errors, and other special cases: // - static const char* function = "boost::math::erf_inv<%1%>(%1%, %1%)"; + constexpr auto function = "boost::math::erf_inv<%1%>(%1%, %1%)"; if((z < -1) || (z > 1)) return policies::raise_domain_error(function, "Argument outside range [-1, 1] in inverse erf function (got p=%1%).", z, pol); if(z == 1) @@ -469,17 +475,17 @@ typename tools::promote_args::type erf_inv(T z, const Policy& pol) // And get the result, negating where required: // return s * policies::checked_narrowing_cast( - detail::erf_inv_imp(static_cast(p), static_cast(q), forwarding_policy(), static_cast(nullptr)), function); + detail::erf_inv_imp(static_cast(p), static_cast(q), forwarding_policy(), tag_type()), function); } template -inline typename tools::promote_args::type erfc_inv(T z) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type erfc_inv(T z) { return erfc_inv(z, policies::policy<>()); } template -inline typename tools::promote_args::type erf_inv(T z) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type erf_inv(T z) { return erf_inv(z, policies::policy<>()); } @@ -487,6 +493,64 @@ inline typename tools::promote_args::type erf_inv(T z) } // namespace math } // namespace boost +#else // Special handling for NVRTC + +namespace boost { +namespace math { + +template +BOOST_MATH_GPU_ENABLED auto erf_inv(T x) +{ + return ::erfinv(x); +} + +template <> +BOOST_MATH_GPU_ENABLED auto erf_inv(float x) +{ + return ::erfinvf(x); +} + +template +BOOST_MATH_GPU_ENABLED auto erf_inv(T x, const Policy&) +{ + return ::erfinv(x); +} + +template +BOOST_MATH_GPU_ENABLED auto erf_inv(float x, const Policy&) +{ + return ::erfinvf(x); +} + +template +BOOST_MATH_GPU_ENABLED auto erfc_inv(T x) +{ + return ::erfcinv(x); +} + +template <> +BOOST_MATH_GPU_ENABLED auto erfc_inv(float x) +{ + return ::erfcinvf(x); +} + +template +BOOST_MATH_GPU_ENABLED auto erfc_inv(T x, const Policy&) +{ + return ::erfcinv(x); +} + +template +BOOST_MATH_GPU_ENABLED auto erfc_inv(float x, const Policy&) +{ + return ::erfcinvf(x); +} + +} // namespace math +} // namespace boost + +#endif // BOOST_MATH_HAS_NVRTV + #ifdef _MSC_VER #pragma warning(pop) #endif diff --git a/include/boost/math/special_functions/detail/fp_traits.hpp b/include/boost/math/special_functions/detail/fp_traits.hpp index 2947a32a21..015ea9cd35 100644 --- a/include/boost/math/special_functions/detail/fp_traits.hpp +++ b/include/boost/math/special_functions/detail/fp_traits.hpp @@ -4,6 +4,7 @@ #define BOOST_MATH_FP_TRAITS_HPP // Copyright (c) 2006 Johan Rade +// Copyright (c) 2024 Matt Borland // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt @@ -24,6 +25,7 @@ With these techniques, the code could be simplified. #include #include #include +#include #include #include @@ -202,14 +204,14 @@ template<> struct fp_traits_non_native { typedef ieee_copy_all_bits_tag method; - static constexpr uint32_t sign = 0x80000000u; - static constexpr uint32_t exponent = 0x7f800000; - static constexpr uint32_t flag = 0x00000000; - static constexpr uint32_t significand = 0x007fffff; + BOOST_MATH_STATIC constexpr uint32_t sign = 0x80000000u; + BOOST_MATH_STATIC constexpr uint32_t exponent = 0x7f800000; + BOOST_MATH_STATIC constexpr uint32_t flag = 0x00000000; + BOOST_MATH_STATIC constexpr uint32_t significand = 0x007fffff; typedef uint32_t bits; - static void get_bits(float x, uint32_t& a) { std::memcpy(&a, &x, 4); } - static void set_bits(float& x, uint32_t a) { std::memcpy(&x, &a, 4); } + BOOST_MATH_GPU_ENABLED static void get_bits(float x, uint32_t& a) { std::memcpy(&a, &x, 4); } + BOOST_MATH_GPU_ENABLED static void set_bits(float& x, uint32_t a) { std::memcpy(&x, &a, 4); } }; // ieee_tag version, double (64 bits) ---------------------------------------------- @@ -250,15 +252,15 @@ template<> struct fp_traits_non_native { typedef ieee_copy_all_bits_tag method; - static constexpr uint64_t sign = static_cast(0x80000000u) << 32; - static constexpr uint64_t exponent = static_cast(0x7ff00000) << 32; - static constexpr uint64_t flag = 0; - static constexpr uint64_t significand + BOOST_MATH_STATIC constexpr uint64_t sign = static_cast(0x80000000u) << 32; + BOOST_MATH_STATIC constexpr uint64_t exponent = static_cast(0x7ff00000) << 32; + BOOST_MATH_STATIC constexpr uint64_t flag = 0; + BOOST_MATH_STATIC constexpr uint64_t significand = (static_cast(0x000fffff) << 32) + static_cast(0xffffffffu); typedef uint64_t bits; - static void get_bits(double x, uint64_t& a) { std::memcpy(&a, &x, 8); } - static void set_bits(double& x, uint64_t a) { std::memcpy(&x, &a, 8); } + BOOST_MATH_GPU_ENABLED static void get_bits(double x, uint64_t& a) { std::memcpy(&a, &x, 8); } + BOOST_MATH_GPU_ENABLED static void set_bits(double& x, uint64_t a) { std::memcpy(&x, &a, 8); } }; #endif @@ -330,10 +332,10 @@ struct fp_traits_non_native { typedef ieee_copy_leading_bits_tag method; - static constexpr uint32_t sign = 0x80000000u; - static constexpr uint32_t exponent = 0x7fff0000; - static constexpr uint32_t flag = 0x00008000; - static constexpr uint32_t significand = 0x00007fff; + BOOST_MATH_STATIC constexpr uint32_t sign = 0x80000000u; + BOOST_MATH_STATIC constexpr uint32_t exponent = 0x7fff0000; + BOOST_MATH_STATIC constexpr uint32_t flag = 0x00008000; + BOOST_MATH_STATIC constexpr uint32_t significand = 0x00007fff; typedef uint32_t bits; @@ -381,10 +383,10 @@ struct fp_traits_non_native { typedef ieee_copy_leading_bits_tag method; - static constexpr uint32_t sign = 0x80000000u; - static constexpr uint32_t exponent = 0x7ff00000; - static constexpr uint32_t flag = 0x00000000; - static constexpr uint32_t significand = 0x000fffff; + BOOST_MATH_STATIC constexpr uint32_t sign = 0x80000000u; + BOOST_MATH_STATIC constexpr uint32_t exponent = 0x7ff00000; + BOOST_MATH_STATIC constexpr uint32_t flag = 0x00000000; + BOOST_MATH_STATIC constexpr uint32_t significand = 0x000fffff; typedef uint32_t bits; @@ -399,7 +401,7 @@ struct fp_traits_non_native } private: - static constexpr int offset_ = BOOST_MATH_ENDIAN_BIG_BYTE ? 0 : 12; + BOOST_MATH_STATIC constexpr int offset_ = BOOST_MATH_ENDIAN_BIG_BYTE ? 0 : 12; }; @@ -419,10 +421,10 @@ struct fp_traits_non_native { typedef ieee_copy_leading_bits_tag method; - static constexpr uint32_t sign = 0x80000000u; - static constexpr uint32_t exponent = 0x7fff0000; - static constexpr uint32_t flag = 0x00008000; - static constexpr uint32_t significand = 0x00007fff; + BOOST_MATH_STATIC constexpr uint32_t sign = 0x80000000u; + BOOST_MATH_STATIC constexpr uint32_t exponent = 0x7fff0000; + BOOST_MATH_STATIC constexpr uint32_t flag = 0x00008000; + BOOST_MATH_STATIC constexpr uint32_t significand = 0x00007fff; // copy 1st, 2nd, 5th and 6th byte. 3rd and 4th byte are padding. @@ -455,10 +457,10 @@ struct fp_traits_non_native { typedef ieee_copy_leading_bits_tag method; - static constexpr uint32_t sign = 0x80000000u; - static constexpr uint32_t exponent = 0x7fff0000; - static constexpr uint32_t flag = 0x00000000; - static constexpr uint32_t significand = 0x0000ffff; + BOOST_MATH_STATIC constexpr uint32_t sign = 0x80000000u; + BOOST_MATH_STATIC constexpr uint32_t exponent = 0x7fff0000; + BOOST_MATH_STATIC constexpr uint32_t flag = 0x00000000; + BOOST_MATH_STATIC constexpr uint32_t significand = 0x0000ffff; typedef uint32_t bits; @@ -473,7 +475,7 @@ struct fp_traits_non_native } private: - static constexpr int offset_ = BOOST_MATH_ENDIAN_BIG_BYTE ? 0 : 12; + BOOST_MATH_STATIC constexpr int offset_ = BOOST_MATH_ENDIAN_BIG_BYTE ? 0 : 12; }; #endif @@ -553,7 +555,8 @@ struct select_native && !defined(BOOST_MATH_DISABLE_STD_FPCLASSIFY)\ && !defined(__INTEL_COMPILER)\ && !defined(sun)\ - && !defined(__VXWORKS__) + && !defined(__VXWORKS__)\ + && !defined(BOOST_MATH_HAS_GPU_SUPPORT) # define BOOST_MATH_USE_STD_FPCLASSIFY #endif diff --git a/include/boost/math/special_functions/detail/gamma_inva.hpp b/include/boost/math/special_functions/detail/gamma_inva.hpp index 75ac89e433..8c3be8ef1a 100644 --- a/include/boost/math/special_functions/detail/gamma_inva.hpp +++ b/include/boost/math/special_functions/detail/gamma_inva.hpp @@ -1,4 +1,5 @@ // (C) Copyright John Maddock 2006. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -17,16 +18,23 @@ #pragma once #endif -#include +#include #include -namespace boost{ namespace math{ namespace detail{ +namespace boost{ namespace math{ + +#ifdef BOOST_MATH_HAS_NVRTC +template +BOOST_MATH_GPU_ENABLED auto erfc_inv(T x, const Policy&); +#endif + +namespace detail{ template struct gamma_inva_t { - gamma_inva_t(T z_, T p_, bool invert_) : z(z_), p(p_), invert(invert_) {} - T operator()(T a) + BOOST_MATH_GPU_ENABLED gamma_inva_t(T z_, T p_, bool invert_) : z(z_), p(p_), invert(invert_) {} + BOOST_MATH_GPU_ENABLED T operator()(T a) { return invert ? p - boost::math::gamma_q(a, z, Policy()) : boost::math::gamma_p(a, z, Policy()) - p; } @@ -36,7 +44,7 @@ struct gamma_inva_t }; template -T inverse_poisson_cornish_fisher(T lambda, T p, T q, const Policy& pol) +BOOST_MATH_GPU_ENABLED T inverse_poisson_cornish_fisher(T lambda, T p, T q, const Policy& pol) { BOOST_MATH_STD_USING // mean: @@ -67,7 +75,7 @@ T inverse_poisson_cornish_fisher(T lambda, T p, T q, const Policy& pol) } template -T gamma_inva_imp(const T& z, const T& p, const T& q, const Policy& pol) +BOOST_MATH_GPU_ENABLED T gamma_inva_imp(const T& z, const T& p, const T& q, const Policy& pol) { BOOST_MATH_STD_USING // for ADL of std lib math functions // @@ -151,7 +159,7 @@ T gamma_inva_imp(const T& z, const T& p, const T& q, const Policy& pol) } // namespace detail template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type gamma_p_inva(T1 x, T2 p, const Policy& pol) { typedef typename tools::promote_args::type result_type; @@ -181,7 +189,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type gamma_q_inva(T1 x, T2 q, const Policy& pol) { typedef typename tools::promote_args::type result_type; @@ -211,14 +219,14 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type gamma_p_inva(T1 x, T2 p) { return boost::math::gamma_p_inva(x, p, policies::policy<>()); } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type gamma_q_inva(T1 x, T2 q) { return boost::math::gamma_q_inva(x, q, policies::policy<>()); diff --git a/include/boost/math/special_functions/detail/ibeta_inv_ab.hpp b/include/boost/math/special_functions/detail/ibeta_inv_ab.hpp index 0ce0d7560e..aab18f50f1 100644 --- a/include/boost/math/special_functions/detail/ibeta_inv_ab.hpp +++ b/include/boost/math/special_functions/detail/ibeta_inv_ab.hpp @@ -17,17 +17,19 @@ #pragma once #endif -#include -#include +#include #include +#include +#include +#include namespace boost{ namespace math{ namespace detail{ template struct beta_inv_ab_t { - beta_inv_ab_t(T b_, T z_, T p_, bool invert_, bool swap_ab_) : b(b_), z(z_), p(p_), invert(invert_), swap_ab(swap_ab_) {} - T operator()(T a) + BOOST_MATH_GPU_ENABLED beta_inv_ab_t(T b_, T z_, T p_, bool invert_, bool swap_ab_) : b(b_), z(z_), p(p_), invert(invert_), swap_ab(swap_ab_) {} + BOOST_MATH_GPU_ENABLED T operator()(T a) { return invert ? p - boost::math::ibetac(swap_ab ? b : a, swap_ab ? a : b, z, Policy()) @@ -39,7 +41,7 @@ struct beta_inv_ab_t }; template -T inverse_negative_binomial_cornish_fisher(T n, T sf, T sfc, T p, T q, const Policy& pol) +BOOST_MATH_GPU_ENABLED T inverse_negative_binomial_cornish_fisher(T n, T sf, T sfc, T p, T q, const Policy& pol) { BOOST_MATH_STD_USING // mean: @@ -72,7 +74,7 @@ T inverse_negative_binomial_cornish_fisher(T n, T sf, T sfc, T p, T q, const Pol } template -T ibeta_inv_ab_imp(const T& b, const T& z, const T& p, const T& q, bool swap_ab, const Policy& pol) +BOOST_MATH_GPU_ENABLED T ibeta_inv_ab_imp(const T& b, const T& z, const T& p, const T& q, bool swap_ab, const Policy& pol) { BOOST_MATH_STD_USING // for ADL of std lib math functions // @@ -121,11 +123,11 @@ T ibeta_inv_ab_imp(const T& b, const T& z, const T& p, const T& q, bool swap_ab, // if((p < q) != swap_ab) { - guess = (std::min)(T(b * 2), T(1)); + guess = BOOST_MATH_GPU_SAFE_MIN(T(b * 2), T(1)); } else { - guess = (std::min)(T(b / 2), T(1)); + guess = BOOST_MATH_GPU_SAFE_MIN(T(b / 2), T(1)); } } if(n * n * n * u * sf > 0.005) @@ -138,11 +140,11 @@ T ibeta_inv_ab_imp(const T& b, const T& z, const T& p, const T& q, bool swap_ab, // if((p < q) != swap_ab) { - guess = (std::min)(T(b * 2), T(10)); + guess = BOOST_MATH_GPU_SAFE_MIN(T(b * 2), T(10)); } else { - guess = (std::min)(T(b / 2), T(10)); + guess = BOOST_MATH_GPU_SAFE_MIN(T(b / 2), T(10)); } } else @@ -151,8 +153,8 @@ T ibeta_inv_ab_imp(const T& b, const T& z, const T& p, const T& q, bool swap_ab, // // Max iterations permitted: // - std::uintmax_t max_iter = policies::get_max_root_iterations(); - std::pair r = bracket_and_solve_root(f, guess, factor, swap_ab ? true : false, tol, max_iter, pol); + boost::math::uintmax_t max_iter = policies::get_max_root_iterations(); + boost::math::pair r = bracket_and_solve_root(f, guess, factor, swap_ab ? true : false, tol, max_iter, pol); if(max_iter >= policies::get_max_root_iterations()) return policies::raise_evaluation_error("boost::math::ibeta_invab_imp<%1%>(%1%,%1%,%1%)", "Unable to locate the root within a reasonable number of iterations, closest approximation so far was %1%", r.first, pol); return (r.first + r.second) / 2; @@ -161,7 +163,7 @@ T ibeta_inv_ab_imp(const T& b, const T& z, const T& p, const T& q, bool swap_ab, } // namespace detail template -typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED typename tools::promote_args::type ibeta_inva(RT1 b, RT2 x, RT3 p, const Policy& pol) { typedef typename tools::promote_args::type result_type; @@ -173,7 +175,7 @@ typename tools::promote_args::type policies::discrete_quantile<>, policies::assert_undefined<> >::type forwarding_policy; - static const char* function = "boost::math::ibeta_inva<%1%>(%1%,%1%,%1%)"; + constexpr auto function = "boost::math::ibeta_inva<%1%>(%1%,%1%,%1%)"; if(p == 0) { return policies::raise_overflow_error(function, 0, Policy()); @@ -185,28 +187,28 @@ typename tools::promote_args::type return policies::checked_narrowing_cast( detail::ibeta_inv_ab_imp( - static_cast(b), - static_cast(x), - static_cast(p), - static_cast(1 - static_cast(p)), - false, pol), + static_cast(b), + static_cast(x), + static_cast(p), + static_cast(1 - static_cast(p)), + false, pol), function); } template -typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED typename tools::promote_args::type ibetac_inva(RT1 b, RT2 x, RT3 q, const Policy& pol) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; typedef typename policies::normalise< - Policy, - policies::promote_float, - policies::promote_double, + Policy, + policies::promote_float, + policies::promote_double, policies::discrete_quantile<>, policies::assert_undefined<> >::type forwarding_policy; - static const char* function = "boost::math::ibetac_inva<%1%>(%1%,%1%,%1%)"; + constexpr auto function = "boost::math::ibetac_inva<%1%>(%1%,%1%,%1%)"; if(q == 1) { return policies::raise_overflow_error(function, 0, Policy()); @@ -218,28 +220,28 @@ typename tools::promote_args::type return policies::checked_narrowing_cast( detail::ibeta_inv_ab_imp( - static_cast(b), - static_cast(x), - static_cast(1 - static_cast(q)), - static_cast(q), + static_cast(b), + static_cast(x), + static_cast(1 - static_cast(q)), + static_cast(q), false, pol), function); } template -typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED typename tools::promote_args::type ibeta_invb(RT1 a, RT2 x, RT3 p, const Policy& pol) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; typedef typename policies::normalise< - Policy, - policies::promote_float, - policies::promote_double, + Policy, + policies::promote_float, + policies::promote_double, policies::discrete_quantile<>, policies::assert_undefined<> >::type forwarding_policy; - static const char* function = "boost::math::ibeta_invb<%1%>(%1%,%1%,%1%)"; + constexpr auto function = "boost::math::ibeta_invb<%1%>(%1%,%1%,%1%)"; if(p == 0) { return tools::min_value(); @@ -251,19 +253,19 @@ typename tools::promote_args::type return policies::checked_narrowing_cast( detail::ibeta_inv_ab_imp( - static_cast(a), - static_cast(x), - static_cast(p), - static_cast(1 - static_cast(p)), + static_cast(a), + static_cast(x), + static_cast(p), + static_cast(1 - static_cast(p)), true, pol), function); } template -typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED typename tools::promote_args::type ibetac_invb(RT1 a, RT2 x, RT3 q, const Policy& pol) { - static const char* function = "boost::math::ibeta_invb<%1%>(%1%, %1%, %1%)"; + constexpr auto function = "boost::math::ibeta_invb<%1%>(%1%, %1%, %1%)"; typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; typedef typename policies::normalise< @@ -293,28 +295,28 @@ typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ibeta_inva(RT1 b, RT2 x, RT3 p) { return boost::math::ibeta_inva(b, x, p, policies::policy<>()); } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ibetac_inva(RT1 b, RT2 x, RT3 q) { return boost::math::ibetac_inva(b, x, q, policies::policy<>()); } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ibeta_invb(RT1 a, RT2 x, RT3 p) { return boost::math::ibeta_invb(a, x, p, policies::policy<>()); } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ibetac_invb(RT1 a, RT2 x, RT3 q) { return boost::math::ibetac_invb(a, x, q, policies::policy<>()); diff --git a/include/boost/math/special_functions/detail/ibeta_inverse.hpp b/include/boost/math/special_functions/detail/ibeta_inverse.hpp index 70f17a0b1a..6f222cf77d 100644 --- a/include/boost/math/special_functions/detail/ibeta_inverse.hpp +++ b/include/boost/math/special_functions/detail/ibeta_inverse.hpp @@ -11,12 +11,14 @@ #pragma once #endif +#include +#include +#include +#include #include #include -#include #include #include -#include namespace boost{ namespace math{ namespace detail{ @@ -27,12 +29,12 @@ namespace boost{ namespace math{ namespace detail{ template struct temme_root_finder { - temme_root_finder(const T t_, const T a_) : t(t_), a(a_) { + BOOST_MATH_GPU_ENABLED temme_root_finder(const T t_, const T a_) : t(t_), a(a_) { BOOST_MATH_ASSERT( math::tools::epsilon() <= a && !(boost::math::isinf)(a)); } - boost::math::tuple operator()(T x) + BOOST_MATH_GPU_ENABLED boost::math::tuple operator()(T x) { BOOST_MATH_STD_USING // ADL of std names @@ -52,7 +54,7 @@ struct temme_root_finder // Section 2. // template -T temme_method_1_ibeta_inverse(T a, T b, T z, const Policy& pol) +BOOST_MATH_GPU_ENABLED T temme_method_1_ibeta_inverse(T a, T b, T z, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std names @@ -138,7 +140,7 @@ T temme_method_1_ibeta_inverse(T a, T b, T z, const Policy& pol) // Section 3. // template -T temme_method_2_ibeta_inverse(T /*a*/, T /*b*/, T z, T r, T theta, const Policy& pol) +BOOST_MATH_GPU_ENABLED T temme_method_2_ibeta_inverse(T /*a*/, T /*b*/, T z, T r, T theta, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std names @@ -302,9 +304,23 @@ T temme_method_2_ibeta_inverse(T /*a*/, T /*b*/, T z, T r, T theta, const Policy // // And iterate: // - x = tools::newton_raphson_iterate( - temme_root_finder(-lu, alpha), x, lower, upper, policies::digits() / 2); - +#ifndef BOOST_MATH_NO_EXCEPTIONS + try { +#endif + x = tools::newton_raphson_iterate( + temme_root_finder(-lu, alpha), x, lower, upper, policies::digits() / 2); +#ifndef BOOST_MATH_NO_EXCEPTIONS + } + catch (const boost::math::evaluation_error&) + { + // Due to numerical instability we may have cases where no root is found when + // in fact we should just touch the origin. We simply ignore the error here + // and return our best guess for x so far... + // Maybe we should special case the symmetrical parameter case, but it's not clear + // whether that is the only situation when problems can occur. + // See https://github.com/boostorg/math/issues/1169 + } +#endif return x; } // @@ -315,10 +331,11 @@ T temme_method_2_ibeta_inverse(T /*a*/, T /*b*/, T z, T r, T theta, const Policy // Section 4. // template -T temme_method_3_ibeta_inverse(T a, T b, T p, T q, const Policy& pol) +BOOST_MATH_GPU_ENABLED T temme_method_3_ibeta_inverse(T a, T b, T p, T q, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std names + // // Begin by getting an initial approximation for the quantity // eta from the dominant part of the incomplete beta: @@ -420,10 +437,10 @@ T temme_method_3_ibeta_inverse(T a, T b, T p, T q, const Policy& pol) template struct ibeta_roots { - ibeta_roots(T _a, T _b, T t, bool inv = false) + BOOST_MATH_GPU_ENABLED ibeta_roots(T _a, T _b, T t, bool inv = false) : a(_a), b(_b), target(t), invert(inv) {} - boost::math::tuple operator()(T x) + BOOST_MATH_GPU_ENABLED boost::math::tuple operator()(T x) { BOOST_MATH_STD_USING // ADL of std names @@ -457,7 +474,7 @@ struct ibeta_roots }; template -T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py) +BOOST_MATH_GPU_ENABLED T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py) { BOOST_MATH_STD_USING // For ADL of math functions. @@ -487,8 +504,8 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py) return p; } // Change things around so we can handle as b == 1 special case below: - std::swap(a, b); - std::swap(p, q); + BOOST_MATH_GPU_SAFE_SWAP(a, b); + BOOST_MATH_GPU_SAFE_SWAP(p, q); invert = true; } // @@ -524,8 +541,8 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py) } else if(b > 0.5f) { - std::swap(a, b); - std::swap(p, q); + BOOST_MATH_GPU_SAFE_SWAP(a, b); + BOOST_MATH_GPU_SAFE_SWAP(p, q); invert = !invert; } } @@ -559,7 +576,7 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py) y = -boost::math::expm1(boost::math::log1p(-q, pol) / a, pol); } if(invert) - std::swap(x, y); + BOOST_MATH_GPU_SAFE_SWAP(x, y); if(py) *py = y; return x; @@ -574,12 +591,12 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py) // if(p > 0.5) { - std::swap(a, b); - std::swap(p, q); + BOOST_MATH_GPU_SAFE_SWAP(a, b); + BOOST_MATH_GPU_SAFE_SWAP(p, q); invert = !invert; } - T minv = (std::min)(a, b); - T maxv = (std::max)(a, b); + T minv = BOOST_MATH_GPU_SAFE_MIN(a, b); + T maxv = BOOST_MATH_GPU_SAFE_MAX(a, b); if((sqrt(minv) > (maxv - minv)) && (minv > 5)) { // @@ -630,8 +647,8 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py) // if(a < b) { - std::swap(a, b); - std::swap(p, q); + BOOST_MATH_GPU_SAFE_SWAP(a, b); + BOOST_MATH_GPU_SAFE_SWAP(p, q); invert = !invert; } // @@ -694,8 +711,8 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py) } if(fs < 0) { - std::swap(a, b); - std::swap(p, q); + BOOST_MATH_GPU_SAFE_SWAP(a, b); + BOOST_MATH_GPU_SAFE_SWAP(p, q); invert = !invert; xs = 1 - xs; } @@ -758,9 +775,9 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py) if(ps < 0) { - std::swap(a, b); - std::swap(p, q); - std::swap(xs, xs2); + BOOST_MATH_GPU_SAFE_SWAP(a, b); + BOOST_MATH_GPU_SAFE_SWAP(p, q); + BOOST_MATH_GPU_SAFE_SWAP(xs, xs2); invert = !invert; } // @@ -823,8 +840,8 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py) // if(b < a) { - std::swap(a, b); - std::swap(p, q); + BOOST_MATH_GPU_SAFE_SWAP(a, b); + BOOST_MATH_GPU_SAFE_SWAP(p, q); invert = !invert; } if (a < tools::min_value()) @@ -890,9 +907,9 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py) // if(x > 0.5) { - std::swap(a, b); - std::swap(p, q); - std::swap(x, y); + BOOST_MATH_GPU_SAFE_SWAP(a, b); + BOOST_MATH_GPU_SAFE_SWAP(p, q); + BOOST_MATH_GPU_SAFE_SWAP(x, y); invert = !invert; T l = 1 - upper; T u = 1 - lower; @@ -922,8 +939,8 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py) if(x < lower) x = lower; } - std::uintmax_t max_iter = policies::get_max_root_iterations(); - std::uintmax_t max_iter_used = 0; + boost::math::uintmax_t max_iter = policies::get_max_root_iterations(); + boost::math::uintmax_t max_iter_used = 0; // // Figure out how many digits to iterate towards: // @@ -946,7 +963,13 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py) // Now iterate, we can use either p or q as the target here // depending on which is smaller: // + // Since we can't use halley_iterate on device we use newton raphson + // + #ifndef BOOST_MATH_HAS_GPU_SUPPORT x = boost::math::tools::halley_iterate( + #else + x = boost::math::tools::newton_raphson_iterate( + #endif boost::math::detail::ibeta_roots(a, b, (p < q ? p : q), (p < q ? false : true)), x, lower, upper, digits, max_iter); policies::check_root_iterations("boost::math::ibeta<%1%>(%1%, %1%, %1%)", max_iter + max_iter_used, pol); // @@ -968,10 +991,10 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py) } // namespace detail template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ibeta_inv(T1 a, T2 b, T3 p, T4* py, const Policy& pol) { - static const char* function = "boost::math::ibeta_inv<%1%>(%1%,%1%,%1%)"; + constexpr auto function = "boost::math::ibeta_inv<%1%>(%1%,%1%,%1%)"; BOOST_FPU_EXCEPTION_GUARD typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -1003,14 +1026,14 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ibeta_inv(T1 a, T2 b, T3 p, T4* py) { return ibeta_inv(a, b, p, py, policies::policy<>()); } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ibeta_inv(T1 a, T2 b, T3 p) { typedef typename tools::promote_args::type result_type; @@ -1018,7 +1041,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ibeta_inv(T1 a, T2 b, T3 p, const Policy& pol) { typedef typename tools::promote_args::type result_type; @@ -1026,10 +1049,10 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ibetac_inv(T1 a, T2 b, T3 q, T4* py, const Policy& pol) { - static const char* function = "boost::math::ibetac_inv<%1%>(%1%,%1%,%1%)"; + constexpr auto function = "boost::math::ibetac_inv<%1%>(%1%,%1%,%1%)"; BOOST_FPU_EXCEPTION_GUARD typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -1061,14 +1084,14 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ibetac_inv(T1 a, T2 b, T3 q, T4* py) { return ibetac_inv(a, b, q, py, policies::policy<>()); } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ibetac_inv(RT1 a, RT2 b, RT3 q) { typedef typename tools::promote_args::type result_type; @@ -1076,7 +1099,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ibetac_inv(RT1 a, RT2 b, RT3 q, const Policy& pol) { typedef typename tools::promote_args::type result_type; diff --git a/include/boost/math/special_functions/detail/iconv.hpp b/include/boost/math/special_functions/detail/iconv.hpp index 90b4aa9381..20889d411e 100644 --- a/include/boost/math/special_functions/detail/iconv.hpp +++ b/include/boost/math/special_functions/detail/iconv.hpp @@ -10,28 +10,29 @@ #pragma once #endif -#include +#include +#include #include namespace boost { namespace math { namespace detail{ template -inline int iconv_imp(T v, Policy const&, std::true_type const&) +BOOST_MATH_GPU_ENABLED inline int iconv_imp(T v, Policy const&, boost::math::true_type const&) { return static_cast(v); } template -inline int iconv_imp(T v, Policy const& pol, std::false_type const&) +BOOST_MATH_GPU_ENABLED inline int iconv_imp(T v, Policy const& pol, boost::math::false_type const&) { BOOST_MATH_STD_USING return iround(v, pol); } template -inline int iconv(T v, Policy const& pol) +BOOST_MATH_GPU_ENABLED inline int iconv(T v, Policy const& pol) { - typedef typename std::is_convertible::type tag_type; + typedef typename boost::math::is_convertible::type tag_type; return iconv_imp(v, pol, tag_type()); } diff --git a/include/boost/math/special_functions/detail/igamma_inverse.hpp b/include/boost/math/special_functions/detail/igamma_inverse.hpp index f6bbcd72d5..4efd4f78a3 100644 --- a/include/boost/math/special_functions/detail/igamma_inverse.hpp +++ b/include/boost/math/special_functions/detail/igamma_inverse.hpp @@ -1,4 +1,5 @@ // (C) Copyright John Maddock 2006. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -10,6 +11,8 @@ #pragma once #endif +#include +#include #include #include #include @@ -21,7 +24,7 @@ namespace boost{ namespace math{ namespace detail{ template -T find_inverse_s(T p, T q) +BOOST_MATH_GPU_ENABLED T find_inverse_s(T p, T q) { // // Computation of the Incomplete Gamma Function Ratios and their Inverse @@ -41,8 +44,8 @@ T find_inverse_s(T p, T q) { t = sqrt(-2 * log(q)); } - static const double a[4] = { 3.31125922108741, 11.6616720288968, 4.28342155967104, 0.213623493715853 }; - static const double b[5] = { 1, 6.61053765625462, 6.40691597760039, 1.27364489782223, 0.3611708101884203e-1 }; + BOOST_MATH_STATIC const double a[4] = { 3.31125922108741, 11.6616720288968, 4.28342155967104, 0.213623493715853 }; + BOOST_MATH_STATIC const double b[5] = { 1, 6.61053765625462, 6.40691597760039, 1.27364489782223, 0.3611708101884203e-1 }; T s = t - tools::evaluate_polynomial(a, t) / tools::evaluate_polynomial(b, t); if(p < T(0.5)) s = -s; @@ -50,7 +53,7 @@ T find_inverse_s(T p, T q) } template -T didonato_SN(T a, T x, unsigned N, T tolerance = 0) +BOOST_MATH_GPU_ENABLED T didonato_SN(T a, T x, unsigned N, T tolerance = 0) { // // Computation of the Incomplete Gamma Function Ratios and their Inverse @@ -77,7 +80,7 @@ T didonato_SN(T a, T x, unsigned N, T tolerance = 0) } template -inline T didonato_FN(T p, T a, T x, unsigned N, T tolerance, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T didonato_FN(T p, T a, T x, unsigned N, T tolerance, const Policy& pol) { // // Computation of the Incomplete Gamma Function Ratios and their Inverse @@ -93,7 +96,7 @@ inline T didonato_FN(T p, T a, T x, unsigned N, T tolerance, const Policy& pol) } template -T find_inverse_gamma(T a, T p, T q, const Policy& pol, bool* p_has_10_digits) +BOOST_MATH_GPU_ENABLED T find_inverse_gamma(T a, T p, T q, const Policy& pol, bool* p_has_10_digits) { // // In order to understand what's going on here, you will @@ -233,7 +236,7 @@ T find_inverse_gamma(T a, T p, T q, const Policy& pol, bool* p_has_10_digits) } else { - T D = (std::max)(T(2), T(a * (a - 1))); + T D = BOOST_MATH_GPU_SAFE_MAX(T(2), T(a * (a - 1))); T lg = boost::math::lgamma(a, pol); T lb = log(q) + lg; if(lb < -D * T(2.3)) @@ -315,7 +318,7 @@ T find_inverse_gamma(T a, T p, T q, const Policy& pol, bool* p_has_10_digits) template struct gamma_p_inverse_func { - gamma_p_inverse_func(T a_, T p_, bool inv) : a(a_), p(p_), invert(inv) + BOOST_MATH_GPU_ENABLED gamma_p_inverse_func(T a_, T p_, bool inv) : a(a_), p(p_), invert(inv) { // // If p is too near 1 then P(x) - p suffers from cancellation @@ -333,7 +336,7 @@ struct gamma_p_inverse_func } } - boost::math::tuple operator()(const T& x)const + BOOST_MATH_GPU_ENABLED boost::math::tuple operator()(const T& x)const { BOOST_FPU_EXCEPTION_GUARD // @@ -395,11 +398,11 @@ struct gamma_p_inverse_func }; template -T gamma_p_inv_imp(T a, T p, const Policy& pol) +BOOST_MATH_GPU_ENABLED T gamma_p_inv_imp(T a, T p, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std functions. - static const char* function = "boost::math::gamma_p_inv<%1%>(%1%, %1%)"; + constexpr auto function = "boost::math::gamma_p_inv<%1%>(%1%, %1%)"; BOOST_MATH_INSTRUMENT_VARIABLE(a); BOOST_MATH_INSTRUMENT_VARIABLE(p); @@ -442,7 +445,9 @@ T gamma_p_inv_imp(T a, T p, const Policy& pol) // // Go ahead and iterate: // - std::uintmax_t max_iter = policies::get_max_root_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_root_iterations(); + + #ifndef BOOST_MATH_HAS_GPU_SUPPORT guess = tools::halley_iterate( detail::gamma_p_inverse_func(a, p, false), guess, @@ -450,6 +455,16 @@ T gamma_p_inv_imp(T a, T p, const Policy& pol) tools::max_value(), digits, max_iter); + #else + guess = tools::newton_raphson_iterate( + detail::gamma_p_inverse_func(a, p, false), + guess, + lower, + tools::max_value(), + digits, + max_iter); + #endif + policies::check_root_iterations(function, max_iter, pol); BOOST_MATH_INSTRUMENT_VARIABLE(guess); if(guess == lower) @@ -458,11 +473,11 @@ T gamma_p_inv_imp(T a, T p, const Policy& pol) } template -T gamma_q_inv_imp(T a, T q, const Policy& pol) +BOOST_MATH_GPU_ENABLED T gamma_q_inv_imp(T a, T q, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std functions. - static const char* function = "boost::math::gamma_q_inv<%1%>(%1%, %1%)"; + constexpr auto function = "boost::math::gamma_q_inv<%1%>(%1%, %1%)"; if(a <= 0) return policies::raise_domain_error(function, "Argument a in the incomplete gamma function inverse must be >= 0 (got a=%1%).", a, pol); @@ -501,7 +516,9 @@ T gamma_q_inv_imp(T a, T q, const Policy& pol) // // Go ahead and iterate: // - std::uintmax_t max_iter = policies::get_max_root_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_root_iterations(); + + #ifndef BOOST_MATH_HAS_GPU_SUPPORT guess = tools::halley_iterate( detail::gamma_p_inverse_func(a, q, true), guess, @@ -509,6 +526,16 @@ T gamma_q_inv_imp(T a, T q, const Policy& pol) tools::max_value(), digits, max_iter); + #else + guess = tools::newton_raphson_iterate( + detail::gamma_p_inverse_func(a, q, true), + guess, + lower, + tools::max_value(), + digits, + max_iter); + #endif + policies::check_root_iterations(function, max_iter, pol); if(guess == lower) guess = policies::raise_underflow_error(function, "Expected result known to be non-zero, but is smaller than the smallest available number.", pol); @@ -518,7 +545,7 @@ T gamma_q_inv_imp(T a, T q, const Policy& pol) } // namespace detail template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type gamma_p_inv(T1 a, T2 p, const Policy& pol) { typedef typename tools::promote_args::type result_type; @@ -528,7 +555,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type gamma_q_inv(T1 a, T2 p, const Policy& pol) { typedef typename tools::promote_args::type result_type; @@ -538,14 +565,14 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type gamma_p_inv(T1 a, T2 p) { return gamma_p_inv(a, p, policies::policy<>()); } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type gamma_q_inv(T1 a, T2 p) { return gamma_q_inv(a, p, policies::policy<>()); diff --git a/include/boost/math/special_functions/detail/igamma_large.hpp b/include/boost/math/special_functions/detail/igamma_large.hpp index 5483b53fb6..8e0ad1b0dd 100644 --- a/include/boost/math/special_functions/detail/igamma_large.hpp +++ b/include/boost/math/special_functions/detail/igamma_large.hpp @@ -1,4 +1,5 @@ // Copyright John Maddock 2006. +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -59,13 +60,16 @@ #pragma GCC system_header #endif +#include +#include + namespace boost{ namespace math{ namespace detail{ // This version will never be called (at runtime), it's a stub used // when T is unsuitable to be passed to these routines: // template -inline T igamma_temme_large(T, T, const Policy& /* pol */, std::integral_constant const *) +BOOST_MATH_GPU_ENABLED inline T igamma_temme_large(T, T, const Policy& /* pol */, const boost::math::integral_constant&) { // stub function, should never actually be called BOOST_MATH_ASSERT(0); @@ -75,8 +79,11 @@ inline T igamma_temme_large(T, T, const Policy& /* pol */, std::integral_constan // This version is accurate for up to 64-bit mantissa's, // (80-bit long double, or 10^-20). // + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT + template -T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant const *) +BOOST_MATH_GPU_ENABLED T igamma_temme_large(T a, T x, const Policy& pol, const boost::math::integral_constant&) { BOOST_MATH_STD_USING // ADL of std functions T sigma = (x - a) / a; @@ -88,7 +95,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant -T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant const *) +BOOST_MATH_GPU_ENABLED T igamma_temme_large(T a, T x, const Policy& pol, const boost::math::integral_constant&) { BOOST_MATH_STD_USING // ADL of std functions T sigma = (x - a) / a; @@ -293,7 +303,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant(-0.33333333333333333L), static_cast(0.083333333333333333L), static_cast(-0.014814814814814815L), @@ -312,7 +322,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant(-0.0018518518518518519L), static_cast(-0.0034722222222222222L), static_cast(0.0026455026455026455L), @@ -329,7 +339,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant(0.0041335978835978836L), static_cast(-0.0026813271604938272L), static_cast(0.00077160493827160494L), @@ -344,7 +354,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant(0.00064943415637860082L), static_cast(0.00022947209362139918L), static_cast(-0.00046918949439525571L), @@ -357,7 +367,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant(-0.0008618882909167117L), static_cast(0.00078403922172006663L), static_cast(-0.00029907248030319018L), @@ -368,7 +378,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant(-0.00033679855336635815L), static_cast(-0.69728137583658578e-4L), static_cast(0.00027727532449593921L), @@ -381,7 +391,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant(0.00053130793646399222L), static_cast(-0.00059216643735369388L), static_cast(0.00027087820967180448L), @@ -392,7 +402,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant(0.00034436760689237767L), static_cast(0.51717909082605922e-4L), static_cast(-0.00033493161081142236L), @@ -401,7 +411,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant(-0.00065262391859530942L), static_cast(0.00083949872067208728L), static_cast(-0.00043829709854172101L), @@ -414,7 +424,18 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant) + { + result += ::erfcf(::sqrtf(y)) / 2; + } + else + { + result += ::erfc(::sqrt(y)) / 2; + } + #else result += boost::math::erfc(sqrt(y), pol) / 2; + #endif return result; } @@ -423,7 +444,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant -T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant const *) +BOOST_MATH_GPU_ENABLED T igamma_temme_large(T a, T x, const Policy& pol, const boost::math::integral_constant&) { BOOST_MATH_STD_USING // ADL of std functions T sigma = (x - a) / a; @@ -435,7 +456,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant(-0.333333333L), static_cast(0.0833333333L), static_cast(-0.0148148148L), @@ -446,7 +467,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant(-0.00185185185L), static_cast(-0.00347222222L), static_cast(0.00264550265L), @@ -455,7 +476,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant(0.00413359788L), static_cast(-0.00268132716L), static_cast(0.000771604938L), @@ -467,7 +488,18 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant) + { + result += ::erfcf(::sqrtf(y)) / 2; + } + else + { + result += ::erfc(::sqrt(y)) / 2; + } + #else result += boost::math::erfc(sqrt(y), pol) / 2; + #endif return result; } @@ -478,8 +510,10 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant -T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant const *) +BOOST_MATH_GPU_ENABLED T igamma_temme_large(T a, T x, const Policy& pol, const boost::math::integral_constant&) { BOOST_MATH_STD_USING // ADL of std functions T sigma = (x - a) / a; @@ -491,7 +525,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant #include +#include +#include +#include #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128) // @@ -28,15 +33,15 @@ namespace boost{ namespace math{ namespace detail{ // These need forward declaring to keep GCC happy: // template -T gamma_imp(T z, const Policy& pol, const Lanczos& l); +BOOST_MATH_GPU_ENABLED T gamma_imp(T z, const Policy& pol, const Lanczos& l); template -T gamma_imp(T z, const Policy& pol, const lanczos::undefined_lanczos& l); +BOOST_MATH_GPU_ENABLED T gamma_imp(T z, const Policy& pol, const lanczos::undefined_lanczos& l); // // lgamma for small arguments: // template -T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant&, const Policy& /* l */, const Lanczos&) +BOOST_MATH_GPU_ENABLED T lgamma_small_imp(T z, T zm1, T zm2, const boost::math::integral_constant&, const Policy& /* l */, const Lanczos&) { // This version uses rational approximations for small // values of z accurate enough for 64-bit mantissas @@ -87,7 +92,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant&, co // At long double: Max error found: 1.987e-21 // Maximum Deviation Found (approximation error): 5.900e-24 // - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.180355685678449379109e-1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.25126649619989678683e-1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.494103151567532234274e-1)), @@ -96,7 +101,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant&, co static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.541009869215204396339e-3)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.324588649825948492091e-4)) }; - static const T Q[] = { + BOOST_MATH_STATIC const T Q[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.1e1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.196202987197795200688e1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.148019669424231326694e1)), @@ -107,7 +112,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant&, co static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.223352763208617092964e-6)) }; - static const float Y = 0.158963680267333984375e0f; + constexpr float Y = 0.158963680267333984375e0f; T r = zm2 * (z + 1); T R = tools::evaluate_polynomial(P, zm2); @@ -152,9 +157,9 @@ T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant&, co // Expected Error Term: 3.139e-021 // - static const float Y = 0.52815341949462890625f; + constexpr float Y = 0.52815341949462890625f; - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.490622454069039543534e-1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.969117530159521214579e-1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.414983358359495381969e0)), @@ -163,7 +168,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant&, co static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.240149820648571559892e-1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.100346687696279557415e-2)) }; - static const T Q[] = { + BOOST_MATH_STATIC const T Q[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.1e1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.302349829846463038743e1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.348739585360723852576e1)), @@ -197,9 +202,9 @@ T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant&, co // Maximum Deviation Found: 2.151e-021 // Expected Error Term: 2.150e-021 // - static const float Y = 0.452017307281494140625f; + constexpr float Y = 0.452017307281494140625f; - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.292329721830270012337e-1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.144216267757192309184e0)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.142440390738631274135e0)), @@ -207,7 +212,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant&, co static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.850535976868336437746e-2)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.431171342679297331241e-3)) }; - static const T Q[] = { + BOOST_MATH_STATIC const T Q[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.1e1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.150169356054485044494e1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.846973248876495016101e0)), @@ -224,8 +229,10 @@ T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant&, co } return result; } + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT template -T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant&, const Policy& /* l */, const Lanczos&) +T lgamma_small_imp(T z, T zm1, T zm2, const boost::math::integral_constant&, const Policy& /* l */, const Lanczos&) { // // This version uses rational approximations for small @@ -482,7 +489,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant&, c return result; } template -T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant&, const Policy& pol, const Lanczos& l) +BOOST_MATH_GPU_ENABLED T lgamma_small_imp(T z, T zm1, T zm2, const boost::math::integral_constant&, const Policy& pol, const Lanczos& l) { // // No rational approximations are available because either @@ -526,6 +533,8 @@ T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant&, con return result; } +#endif // BOOST_MATH_HAS_GPU_SUPPORT + }}} // namespaces #endif // BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_LGAMMA_SMALL diff --git a/include/boost/math/special_functions/detail/round_fwd.hpp b/include/boost/math/special_functions/detail/round_fwd.hpp index c58459e36d..7d69f8b9c5 100644 --- a/include/boost/math/special_functions/detail/round_fwd.hpp +++ b/include/boost/math/special_functions/detail/round_fwd.hpp @@ -1,4 +1,5 @@ // Copyright John Maddock 2008. +// Copyright Matt Borland 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. @@ -21,53 +22,53 @@ namespace boost { template - typename tools::promote_args::type trunc(const T& v, const Policy& pol); + BOOST_MATH_GPU_ENABLED typename tools::promote_args::type trunc(const T& v, const Policy& pol); template - typename tools::promote_args::type trunc(const T& v); + BOOST_MATH_GPU_ENABLED typename tools::promote_args::type trunc(const T& v); template - int itrunc(const T& v, const Policy& pol); + BOOST_MATH_GPU_ENABLED int itrunc(const T& v, const Policy& pol); template - int itrunc(const T& v); + BOOST_MATH_GPU_ENABLED int itrunc(const T& v); template - long ltrunc(const T& v, const Policy& pol); + BOOST_MATH_GPU_ENABLED long ltrunc(const T& v, const Policy& pol); template - long ltrunc(const T& v); + BOOST_MATH_GPU_ENABLED long ltrunc(const T& v); template - long long lltrunc(const T& v, const Policy& pol); + BOOST_MATH_GPU_ENABLED long long lltrunc(const T& v, const Policy& pol); template - long long lltrunc(const T& v); + BOOST_MATH_GPU_ENABLED long long lltrunc(const T& v); template - typename tools::promote_args::type round(const T& v, const Policy& pol); + BOOST_MATH_GPU_ENABLED typename tools::promote_args::type round(const T& v, const Policy& pol); template - typename tools::promote_args::type round(const T& v); + BOOST_MATH_GPU_ENABLED typename tools::promote_args::type round(const T& v); template - int iround(const T& v, const Policy& pol); + BOOST_MATH_GPU_ENABLED int iround(const T& v, const Policy& pol); template - int iround(const T& v); + BOOST_MATH_GPU_ENABLED int iround(const T& v); template - long lround(const T& v, const Policy& pol); + BOOST_MATH_GPU_ENABLED long lround(const T& v, const Policy& pol); template - long lround(const T& v); + BOOST_MATH_GPU_ENABLED long lround(const T& v); template - long long llround(const T& v, const Policy& pol); + BOOST_MATH_GPU_ENABLED long long llround(const T& v, const Policy& pol); template - long long llround(const T& v); + BOOST_MATH_GPU_ENABLED long long llround(const T& v); template - T modf(const T& v, T* ipart, const Policy& pol); + BOOST_MATH_GPU_ENABLED T modf(const T& v, T* ipart, const Policy& pol); template - T modf(const T& v, T* ipart); + BOOST_MATH_GPU_ENABLED T modf(const T& v, T* ipart); template - T modf(const T& v, int* ipart, const Policy& pol); + BOOST_MATH_GPU_ENABLED T modf(const T& v, int* ipart, const Policy& pol); template - T modf(const T& v, int* ipart); + BOOST_MATH_GPU_ENABLED T modf(const T& v, int* ipart); template - T modf(const T& v, long* ipart, const Policy& pol); + BOOST_MATH_GPU_ENABLED T modf(const T& v, long* ipart, const Policy& pol); template - T modf(const T& v, long* ipart); + BOOST_MATH_GPU_ENABLED T modf(const T& v, long* ipart); template - T modf(const T& v, long long* ipart, const Policy& pol); + BOOST_MATH_GPU_ENABLED T modf(const T& v, long long* ipart, const Policy& pol); template - T modf(const T& v, long long* ipart); + BOOST_MATH_GPU_ENABLED T modf(const T& v, long long* ipart); } } diff --git a/include/boost/math/special_functions/detail/t_distribution_inv.hpp b/include/boost/math/special_functions/detail/t_distribution_inv.hpp index 9209b6d405..79a29a0274 100644 --- a/include/boost/math/special_functions/detail/t_distribution_inv.hpp +++ b/include/boost/math/special_functions/detail/t_distribution_inv.hpp @@ -11,6 +11,9 @@ #pragma once #endif +#include +#include +#include #include #include #include @@ -24,7 +27,7 @@ namespace boost{ namespace math{ namespace detail{ // Communications of the ACM, 13(10): 619-620, Oct., 1970. // template -T inverse_students_t_hill(T ndf, T u, const Policy& pol) +BOOST_MATH_GPU_ENABLED T inverse_students_t_hill(T ndf, T u, const Policy& pol) { BOOST_MATH_STD_USING BOOST_MATH_ASSERT(u <= 0.5); @@ -74,7 +77,7 @@ T inverse_students_t_hill(T ndf, T u, const Policy& pol) // Journal of Computational Finance, Vol 9 Issue 4, pp 37-73, Summer 2006 // template -T inverse_students_t_tail_series(T df, T v, const Policy& pol) +BOOST_MATH_GPU_ENABLED T inverse_students_t_tail_series(T df, T v, const Policy& pol) { BOOST_MATH_STD_USING // Tail series expansion, see section 6 of Shaw's paper. @@ -125,7 +128,7 @@ T inverse_students_t_tail_series(T df, T v, const Policy& pol) } template -T inverse_students_t_body_series(T df, T u, const Policy& pol) +BOOST_MATH_GPU_ENABLED T inverse_students_t_body_series(T df, T u, const Policy& pol) { BOOST_MATH_STD_USING // @@ -204,7 +207,7 @@ T inverse_students_t_body_series(T df, T u, const Policy& pol) } template -T inverse_students_t(T df, T u, T v, const Policy& pol, bool* pexact = nullptr) +BOOST_MATH_GPU_ENABLED T inverse_students_t(T df, T u, T v, const Policy& pol, bool* pexact = nullptr) { // // df = number of degrees of freedom. @@ -220,7 +223,7 @@ T inverse_students_t(T df, T u, T v, const Policy& pol, bool* pexact = nullptr) if(u > v) { // function is symmetric, invert it: - std::swap(u, v); + BOOST_MATH_GPU_SAFE_SWAP(u, v); invert = true; } if((floor(df) == df) && (df < 20)) @@ -416,7 +419,7 @@ T inverse_students_t(T df, T u, T v, const Policy& pol, bool* pexact = nullptr) } template -inline T find_ibeta_inv_from_t_dist(T a, T p, T /*q*/, T* py, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T find_ibeta_inv_from_t_dist(T a, T p, T /*q*/, T* py, const Policy& pol) { T u = p / 2; T v = 1 - u; @@ -426,8 +429,21 @@ inline T find_ibeta_inv_from_t_dist(T a, T p, T /*q*/, T* py, const Policy& pol) return df / (df + t * t); } +// NVRTC requires this forward decl because there is a header cycle between here and ibeta_inverse.hpp +#ifdef BOOST_MATH_HAS_NVRTC + +} // Namespace detail + +template +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type + ibeta_inv(T1 a, T2 b, T3 p, T4* py, const Policy& pol); + +namespace detail { + +#endif + template -inline T fast_students_t_quantile_imp(T df, T p, const Policy& pol, const std::false_type*) +BOOST_MATH_GPU_ENABLED inline T fast_students_t_quantile_imp(T df, T p, const Policy& pol, const boost::math::false_type*) { BOOST_MATH_STD_USING // @@ -450,12 +466,12 @@ inline T fast_students_t_quantile_imp(T df, T p, const Policy& pol, const std::f } template -T fast_students_t_quantile_imp(T df, T p, const Policy& pol, const std::true_type*) +BOOST_MATH_GPU_ENABLED T fast_students_t_quantile_imp(T df, T p, const Policy& pol, const boost::math::true_type*) { BOOST_MATH_STD_USING bool invert = false; if((df < 2) && (floor(df) != df)) - return boost::math::detail::fast_students_t_quantile_imp(df, p, pol, static_cast(nullptr)); + return boost::math::detail::fast_students_t_quantile_imp(df, p, pol, static_cast(nullptr)); if(p > 0.5) { p = 1 - p; @@ -521,7 +537,7 @@ T fast_students_t_quantile_imp(T df, T p, const Policy& pol, const std::true_typ } template -inline T fast_students_t_quantile(T df, T p, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T fast_students_t_quantile(T df, T p, const Policy& pol) { typedef typename policies::evaluation::type value_type; typedef typename policies::normalise< @@ -531,12 +547,12 @@ inline T fast_students_t_quantile(T df, T p, const Policy& pol) policies::discrete_quantile<>, policies::assert_undefined<> >::type forwarding_policy; - typedef std::integral_constant::digits <= 53) + typedef boost::math::integral_constant::digits <= 53) && - (std::numeric_limits::is_specialized) + (boost::math::numeric_limits::is_specialized) && - (std::numeric_limits::radix == 2) + (boost::math::numeric_limits::radix == 2) > tag_type; return policies::checked_narrowing_cast(fast_students_t_quantile_imp(static_cast(df), static_cast(p), pol, static_cast(nullptr)), "boost::math::students_t_quantile<%1%>(%1%,%1%,%1%)"); } diff --git a/include/boost/math/special_functions/detail/unchecked_factorial.hpp b/include/boost/math/special_functions/detail/unchecked_factorial.hpp index b528a24fe9..92481f2c6e 100644 --- a/include/boost/math/special_functions/detail/unchecked_factorial.hpp +++ b/include/boost/math/special_functions/detail/unchecked_factorial.hpp @@ -10,19 +10,23 @@ #pragma once #endif -#ifdef _MSC_VER -#pragma warning(push) // Temporary until lexical cast fixed. -#pragma warning(disable: 4127 4701) -#endif -#include -#ifdef _MSC_VER -#pragma warning(pop) -#endif -#include +#include +#include +#include +#include #include -#include -#include -#include + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +# ifdef _MSC_VER +# pragma warning(push) // Temporary until lexical cast fixed. +# pragma warning(disable: 4127 4701) +# endif +# include +# ifdef _MSC_VER +# pragma warning(pop) +# endif +#endif + #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128) // @@ -46,11 +50,21 @@ struct max_factorial; template struct unchecked_factorial_data; +#ifdef BOOST_MATH_HAS_NVRTC + +// Need fwd decl +template +BOOST_MATH_GPU_ENABLED inline T unchecked_factorial(unsigned i); + +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT + template struct unchecked_factorial_data { #ifdef BOOST_MATH_HAVE_CONSTEXPR_TABLES - static constexpr std::array factorials = { { + static constexpr boost::math::array factorials = { { 1.0F, 1.0F, 2.0F, @@ -88,15 +102,15 @@ struct unchecked_factorial_data 0.29523279903960414084761860964352e39F, }}; #else - static const std::array factorials; + static const boost::math::array factorials; #endif }; template #ifdef BOOST_MATH_HAVE_CONSTEXPR_TABLES - constexpr std::array unchecked_factorial_data::factorials; + constexpr boost::math::array unchecked_factorial_data::factorials; #else - const std::array unchecked_factorial_data::factorials = {{ + const boost::math::array unchecked_factorial_data::factorials = {{ 1.0F, 1.0F, 2.0F, @@ -137,22 +151,72 @@ template // Definitions: template <> -inline BOOST_MATH_CONSTEXPR_TABLE_FUNCTION float unchecked_factorial(unsigned i BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(float)) +BOOST_MATH_GPU_ENABLED inline BOOST_MATH_CONSTEXPR_TABLE_FUNCTION float unchecked_factorial(unsigned i BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(float)) { return unchecked_factorial_data::factorials[i]; } +#else + +template <> +BOOST_MATH_GPU_ENABLED inline BOOST_MATH_CONSTEXPR_TABLE_FUNCTION float unchecked_factorial(unsigned i BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(float)) +{ + constexpr float factorials[] = { + 1.0F, + 1.0F, + 2.0F, + 6.0F, + 24.0F, + 120.0F, + 720.0F, + 5040.0F, + 40320.0F, + 362880.0F, + 3628800.0F, + 39916800.0F, + 479001600.0F, + 6227020800.0F, + 87178291200.0F, + 1307674368000.0F, + 20922789888000.0F, + 355687428096000.0F, + 6402373705728000.0F, + 121645100408832000.0F, + 0.243290200817664e19F, + 0.5109094217170944e20F, + 0.112400072777760768e22F, + 0.2585201673888497664e23F, + 0.62044840173323943936e24F, + 0.15511210043330985984e26F, + 0.403291461126605635584e27F, + 0.10888869450418352160768e29F, + 0.304888344611713860501504e30F, + 0.8841761993739701954543616e31F, + 0.26525285981219105863630848e33F, + 0.822283865417792281772556288e34F, + 0.26313083693369353016721801216e36F, + 0.868331761881188649551819440128e37F, + 0.29523279903960414084761860964352e39F, + }; + + return factorials[i]; +} + +#endif + template <> struct max_factorial { static constexpr unsigned value = 34; }; +#ifndef BOOST_MATH_HAS_GPU_SUPPORT + template struct unchecked_factorial_data { #ifdef BOOST_MATH_HAVE_CONSTEXPR_TABLES - static constexpr std::array factorials = { { + static constexpr boost::math::array factorials = { { 1.0, 1.0, 2.0, @@ -326,15 +390,15 @@ struct unchecked_factorial_data 0.7257415615307998967396728211129263114717e307, }}; #else - static const std::array factorials; + static const boost::math::array factorials; #endif }; template #ifdef BOOST_MATH_HAVE_CONSTEXPR_TABLES - constexpr std::array unchecked_factorial_data::factorials; + constexpr boost::math::array unchecked_factorial_data::factorials; #else - const std::array unchecked_factorial_data::factorials = {{ + const boost::math::array unchecked_factorial_data::factorials = {{ 1.0, 1.0, 2.0, @@ -510,7 +574,7 @@ template #endif template <> -inline BOOST_MATH_CONSTEXPR_TABLE_FUNCTION double unchecked_factorial(unsigned i BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(double)) +BOOST_MATH_GPU_ENABLED inline BOOST_MATH_CONSTEXPR_TABLE_FUNCTION double unchecked_factorial(unsigned i BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(double)) { return unchecked_factorial_data::factorials[i]; } @@ -521,11 +585,67 @@ struct max_factorial static constexpr unsigned value = 170; }; +#else + +template <> +BOOST_MATH_GPU_ENABLED inline BOOST_MATH_CONSTEXPR_TABLE_FUNCTION double unchecked_factorial(unsigned i BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(double)) +{ + constexpr double factorials[] = { + 1, + 1, + 2, + 6, + 24, + 120, + 720, + 5040, + 40320, + 362880.0, + 3628800.0, + 39916800.0, + 479001600.0, + 6227020800.0, + 87178291200.0, + 1307674368000.0, + 20922789888000.0, + 355687428096000.0, + 6402373705728000.0, + 121645100408832000.0, + 0.243290200817664e19, + 0.5109094217170944e20, + 0.112400072777760768e22, + 0.2585201673888497664e23, + 0.62044840173323943936e24, + 0.15511210043330985984e26, + 0.403291461126605635584e27, + 0.10888869450418352160768e29, + 0.304888344611713860501504e30, + 0.8841761993739701954543616e31, + 0.26525285981219105863630848e33, + 0.822283865417792281772556288e34, + 0.26313083693369353016721801216e36, + 0.868331761881188649551819440128e37, + 0.29523279903960414084761860964352e39, + }; + + return factorials[i]; +} + +template <> +struct max_factorial +{ + static constexpr unsigned value = 34; +}; + +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT + template struct unchecked_factorial_data { #ifdef BOOST_MATH_HAVE_CONSTEXPR_TABLES - static constexpr std::array factorials = { { + static constexpr boost::math::array factorials = { { 1L, 1L, 2L, @@ -699,15 +819,15 @@ struct unchecked_factorial_data 0.7257415615307998967396728211129263114717e307L, }}; #else - static const std::array factorials; + static const boost::math::array factorials; #endif }; template #ifdef BOOST_MATH_HAVE_CONSTEXPR_TABLES - constexpr std::array unchecked_factorial_data::factorials; + constexpr boost::math::array unchecked_factorial_data::factorials; #else - const std::array unchecked_factorial_data::factorials = {{ + const boost::math::array unchecked_factorial_data::factorials = {{ 1L, 1L, 2L, @@ -900,7 +1020,7 @@ template struct unchecked_factorial_data { #ifdef BOOST_MATH_HAVE_CONSTEXPR_TABLES - static constexpr std::array factorials = { { + static constexpr boost::math::array factorials = { { 1, 1, 2, @@ -1074,15 +1194,15 @@ struct unchecked_factorial_data 0.7257415615307998967396728211129263114717e307Q, } }; #else - static const std::array factorials; + static const boost::math::array factorials; #endif }; template #ifdef BOOST_MATH_HAVE_CONSTEXPR_TABLES -constexpr std::array unchecked_factorial_data::factorials; +constexpr boost::math::array unchecked_factorial_data::factorials; #else -const std::array unchecked_factorial_data::factorials = { { +const boost::math::array unchecked_factorial_data::factorials = { { 1, 1, 2, @@ -1294,7 +1414,7 @@ const typename unchecked_factorial_initializer::init unchecked_factorial_init template -inline T unchecked_factorial_imp(unsigned i, const std::integral_constant&) +inline T unchecked_factorial_imp(unsigned i, const boost::math::integral_constant&) { // // If you're foolish enough to instantiate factorial @@ -1308,10 +1428,10 @@ inline T unchecked_factorial_imp(unsigned i, const std::integral_constant(factorial(n)); // See factorial documentation for more detail. // - static_assert(!std::is_integral::value && !std::numeric_limits::is_integer, "Type T must not be an integral type"); + static_assert(!boost::math::is_integral::value && !boost::math::numeric_limits::is_integer, "Type T must not be an integral type"); // We rely on C++11 thread safe initialization here: - static const std::array factorials = {{ + static const boost::math::array factorials = {{ T(boost::math::tools::convert_from_string("1")), T(boost::math::tools::convert_from_string("1")), T(boost::math::tools::convert_from_string("2")), @@ -1419,7 +1539,7 @@ inline T unchecked_factorial_imp(unsigned i, const std::integral_constant -inline T unchecked_factorial_imp(unsigned i, const std::integral_constant&) +inline T unchecked_factorial_imp(unsigned i, const boost::math::integral_constant&) { // // If you're foolish enough to instantiate factorial @@ -1433,7 +1553,7 @@ inline T unchecked_factorial_imp(unsigned i, const std::integral_constant(factorial(n)); // See factorial documentation for more detail. // - static_assert(!std::is_integral::value && !std::numeric_limits::is_integer, "Type T must not be an integral type"); + static_assert(!boost::math::is_integral::value && !boost::math::numeric_limits::is_integer, "Type T must not be an integral type"); static const char* const factorial_strings[] = { "1", @@ -1556,42 +1676,48 @@ inline T unchecked_factorial_imp(unsigned i, const std::integral_constant -inline T unchecked_factorial_imp(unsigned i, const std::integral_constant::digits>&) +BOOST_MATH_GPU_ENABLED inline T unchecked_factorial_imp(unsigned i, const boost::math::integral_constant::digits>&) { return unchecked_factorial(i); } template -inline T unchecked_factorial_imp(unsigned i, const std::integral_constant::digits>&) +BOOST_MATH_GPU_ENABLED inline T unchecked_factorial_imp(unsigned i, const boost::math::integral_constant::digits>&) { return unchecked_factorial(i); } +#ifndef BOOST_MATH_HAS_GPU_SUPPORT + #if DBL_MANT_DIG != LDBL_MANT_DIG template -inline T unchecked_factorial_imp(unsigned i, const std::integral_constant&) +inline T unchecked_factorial_imp(unsigned i, const boost::math::integral_constant&) { return unchecked_factorial(i); } #endif #ifdef BOOST_MATH_USE_FLOAT128 template -inline T unchecked_factorial_imp(unsigned i, const std::integral_constant&) +inline T unchecked_factorial_imp(unsigned i, const boost::math::integral_constant&) { return unchecked_factorial(i); } #endif +#endif // BOOST_MATH_HAS_GPU_SUPPORT + template -inline T unchecked_factorial(unsigned i) +BOOST_MATH_GPU_ENABLED inline T unchecked_factorial(unsigned i) { typedef typename boost::math::policies::precision >::type tag_type; return unchecked_factorial_imp(i, tag_type()); } #ifdef BOOST_MATH_USE_FLOAT128 -#define BOOST_MATH_DETAIL_FLOAT128_MAX_FACTORIAL : std::numeric_limits::digits == 113 ? max_factorial::value +#define BOOST_MATH_DETAIL_FLOAT128_MAX_FACTORIAL : boost::math::numeric_limits::digits == 113 ? max_factorial::value #else #define BOOST_MATH_DETAIL_FLOAT128_MAX_FACTORIAL #endif @@ -1600,10 +1726,12 @@ template struct max_factorial { static constexpr unsigned value = - std::numeric_limits::digits == std::numeric_limits::digits ? max_factorial::value - : std::numeric_limits::digits == std::numeric_limits::digits ? max_factorial::value - : std::numeric_limits::digits == std::numeric_limits::digits ? max_factorial::value + boost::math::numeric_limits::digits == boost::math::numeric_limits::digits ? max_factorial::value + : boost::math::numeric_limits::digits == boost::math::numeric_limits::digits ? max_factorial::value + #ifndef BOOST_MATH_GPU_ENABLED + : boost::math::numeric_limits::digits == boost::math::numeric_limits::digits ? max_factorial::value BOOST_MATH_DETAIL_FLOAT128_MAX_FACTORIAL + #endif : 100; }; diff --git a/include/boost/math/special_functions/digamma.hpp b/include/boost/math/special_functions/digamma.hpp index 3922de7d25..382ad0d6b9 100644 --- a/include/boost/math/special_functions/digamma.hpp +++ b/include/boost/math/special_functions/digamma.hpp @@ -1,4 +1,5 @@ // (C) Copyright John Maddock 2006. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -12,13 +13,21 @@ #pragma warning(disable:4702) // Unreachable code (release mode only warning) #endif -#include +#include +#include #include -#include #include +#include +#include +#include + +#ifndef BOOST_MATH_HAS_NVRTC +#include +#include #include #include #include +#endif #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128) // @@ -37,11 +46,11 @@ namespace detail{ // Begin by defining the smallest value for which it is safe to // use the asymptotic expansion for digamma: // -inline unsigned digamma_large_lim(const std::integral_constant*) +BOOST_MATH_GPU_ENABLED inline unsigned digamma_large_lim(const boost::math::integral_constant*) { return 20; } -inline unsigned digamma_large_lim(const std::integral_constant*) +BOOST_MATH_GPU_ENABLED inline unsigned digamma_large_lim(const boost::math::integral_constant*) { return 20; } -inline unsigned digamma_large_lim(const void*) +BOOST_MATH_GPU_ENABLED inline unsigned digamma_large_lim(const void*) { return 10; } // // Implementations of the asymptotic expansion come next, @@ -53,8 +62,10 @@ inline unsigned digamma_large_lim(const void*) // // This first one gives 34-digit precision for x >= 20: // + +#ifndef BOOST_MATH_HAS_NVRTC template -inline T digamma_imp_large(T x, const std::integral_constant*) +inline T digamma_imp_large(T x, const boost::math::integral_constant*) { BOOST_MATH_STD_USING // ADL of std functions. static const T P[] = { @@ -87,7 +98,7 @@ inline T digamma_imp_large(T x, const std::integral_constant*) // 19-digit precision for x >= 10: // template -inline T digamma_imp_large(T x, const std::integral_constant*) +inline T digamma_imp_large(T x, const boost::math::integral_constant*) { BOOST_MATH_STD_USING // ADL of std functions. static const T P[] = { @@ -110,14 +121,15 @@ inline T digamma_imp_large(T x, const std::integral_constant*) result -= z * tools::evaluate_polynomial(P, z); return result; } +#endif // // 17-digit precision for x >= 10: // template -inline T digamma_imp_large(T x, const std::integral_constant*) +BOOST_MATH_GPU_ENABLED inline T digamma_imp_large(T x, const boost::math::integral_constant*) { BOOST_MATH_STD_USING // ADL of std functions. - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { 0.083333333333333333333333333333333333333333333333333, -0.0083333333333333333333333333333333333333333333333333, 0.003968253968253968253968253968253968253968253968254, @@ -138,10 +150,10 @@ inline T digamma_imp_large(T x, const std::integral_constant*) // 9-digit precision for x >= 10: // template -inline T digamma_imp_large(T x, const std::integral_constant*) +BOOST_MATH_GPU_ENABLED inline T digamma_imp_large(T x, const boost::math::integral_constant*) { BOOST_MATH_STD_USING // ADL of std functions. - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { 0.083333333333333333333333333333333333333333333333333f, -0.0083333333333333333333333333333333333333333333333333f, 0.003968253968253968253968253968253968253968253968254f @@ -153,6 +165,8 @@ inline T digamma_imp_large(T x, const std::integral_constant*) result -= z * tools::evaluate_polynomial(P, z); return result; } + +#ifndef BOOST_MATH_HAS_NVRTC // // Fully generic asymptotic expansion in terms of Bernoulli numbers, see: // http://functions.wolfram.com/06.14.06.0012.01 @@ -177,7 +191,7 @@ struct digamma_series_func }; template -inline T digamma_imp_large(T x, const Policy& pol, const std::integral_constant*) +inline T digamma_imp_large(T x, const Policy& pol, const boost::math::integral_constant*) { BOOST_MATH_STD_USING digamma_series_func s(x); @@ -194,7 +208,7 @@ inline T digamma_imp_large(T x, const Policy& pol, const std::integral_constant< // 35-digit precision: // template -T digamma_imp_1_2(T x, const std::integral_constant*) +T digamma_imp_1_2(T x, const boost::math::integral_constant*) { // // Now the approximation, we use the form: @@ -258,7 +272,7 @@ T digamma_imp_1_2(T x, const std::integral_constant*) // 19-digit precision: // template -T digamma_imp_1_2(T x, const std::integral_constant*) +T digamma_imp_1_2(T x, const boost::math::integral_constant*) { // // Now the approximation, we use the form: @@ -306,11 +320,13 @@ T digamma_imp_1_2(T x, const std::integral_constant*) return result; } + +#endif // // 18-digit precision: // template -T digamma_imp_1_2(T x, const std::integral_constant*) +BOOST_MATH_GPU_ENABLED T digamma_imp_1_2(T x, const boost::math::integral_constant*) { // // Now the approximation, we use the form: @@ -325,13 +341,13 @@ T digamma_imp_1_2(T x, const std::integral_constant*) // At double precision, max error found: 2.452e-17 // // LCOV_EXCL_START - static const float Y = 0.99558162689208984F; + BOOST_MATH_STATIC const float Y = 0.99558162689208984F; - static const T root1 = T(1569415565) / 1073741824uL; - static const T root2 = (T(381566830) / 1073741824uL) / 1073741824uL; - static const T root3 = BOOST_MATH_BIG_CONSTANT(T, 53, 0.9016312093258695918615325266959189453125e-19); + BOOST_MATH_STATIC const T root1 = T(1569415565) / 1073741824uL; + BOOST_MATH_STATIC const T root2 = (T(381566830) / 1073741824uL) / 1073741824uL; + BOOST_MATH_STATIC const T root3 = BOOST_MATH_BIG_CONSTANT(T, 53, 0.9016312093258695918615325266959189453125e-19); - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 53, 0.25479851061131551), BOOST_MATH_BIG_CONSTANT(T, 53, -0.32555031186804491), BOOST_MATH_BIG_CONSTANT(T, 53, -0.65031853770896507), @@ -339,7 +355,7 @@ T digamma_imp_1_2(T x, const std::integral_constant*) BOOST_MATH_BIG_CONSTANT(T, 53, -0.045251321448739056), BOOST_MATH_BIG_CONSTANT(T, 53, -0.0020713321167745952) }; - static const T Q[] = { + BOOST_MATH_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 53, 1.0), BOOST_MATH_BIG_CONSTANT(T, 53, 2.0767117023730469), BOOST_MATH_BIG_CONSTANT(T, 53, 1.4606242909763515), @@ -361,7 +377,7 @@ T digamma_imp_1_2(T x, const std::integral_constant*) // 9-digit precision: // template -inline T digamma_imp_1_2(T x, const std::integral_constant*) +BOOST_MATH_GPU_ENABLED inline T digamma_imp_1_2(T x, const boost::math::integral_constant*) { // // Now the approximation, we use the form: @@ -376,16 +392,16 @@ inline T digamma_imp_1_2(T x, const std::integral_constant*) // At float precision, max error found: 2.008725e-008 // // LCOV_EXCL_START - static const float Y = 0.99558162689208984f; - static const T root = 1532632.0f / 1048576; - static const T root_minor = static_cast(0.3700660185912626595423257213284682051735604e-6L); - static const T P[] = { + BOOST_MATH_STATIC const float Y = 0.99558162689208984f; + BOOST_MATH_STATIC const T root = 1532632.0f / 1048576; + BOOST_MATH_STATIC const T root_minor = static_cast(0.3700660185912626595423257213284682051735604e-6L); + BOOST_MATH_STATIC const T P[] = { 0.25479851023250261e0f, -0.44981331915268368e0f, -0.43916936919946835e0f, -0.61041765350579073e-1f }; - static const T Q[] = { + BOOST_MATH_STATIC const T Q[] = { 0.1e1f, 0.15890202430554952e1f, 0.65341249856146947e0f, @@ -401,7 +417,7 @@ inline T digamma_imp_1_2(T x, const std::integral_constant*) } template -T digamma_imp(T x, const Tag* t, const Policy& pol) +BOOST_MATH_GPU_ENABLED T digamma_imp(T x, const Tag* t, const Policy& pol) { // // This handles reflection of negative arguments, and all our @@ -439,11 +455,13 @@ T digamma_imp(T x, const Tag* t, const Policy& pol) // If we're above the lower-limit for the // asymptotic expansion then use it: // + #ifndef BOOST_MATH_HAS_NVRTC if(x >= digamma_large_lim(t)) { result += digamma_imp_large(x, t); } else + #endif { // // If x > 2 reduce to the interval [1,2]: @@ -466,8 +484,10 @@ T digamma_imp(T x, const Tag* t, const Policy& pol) return result; } +#ifndef BOOST_MATH_HAS_NVRTC + template -T digamma_imp(T x, const std::integral_constant* t, const Policy& pol) +T digamma_imp(T x, const boost::math::integral_constant* t, const Policy& pol) { // // This handles reflection of negative arguments, and all our @@ -564,16 +584,18 @@ T digamma_imp(T x, const std::integral_constant* t, const Policy& pol) // LCOV_EXCL_STOP } +#endif + } // namespace detail template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type digamma(T x, const Policy&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; typedef typename policies::precision::type precision_type; - typedef std::integral_constant 113) ? 0 : precision_type::value <= 24 ? 24 : precision_type::value <= 53 ? 53 : @@ -592,7 +614,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type digamma(T x) { return digamma(x, policies::policy<>()); diff --git a/include/boost/math/special_functions/ellint_1.hpp b/include/boost/math/special_functions/ellint_1.hpp index dfc1815f7f..96c7c9e9b9 100644 --- a/include/boost/math/special_functions/ellint_1.hpp +++ b/include/boost/math/special_functions/ellint_1.hpp @@ -1,5 +1,6 @@ // Copyright (c) 2006 Xiaogang Zhang // Copyright (c) 2006 John Maddock +// Copyright (c) 2024 Matt Borland // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -18,6 +19,8 @@ #pragma once #endif +#include +#include #include #include #include @@ -31,28 +34,28 @@ namespace boost { namespace math { template -typename tools::promote_args::type ellint_1(T1 k, T2 phi, const Policy& pol); +BOOST_MATH_GPU_ENABLED typename tools::promote_args::type ellint_1(T1 k, T2 phi, const Policy& pol); namespace detail{ template -T ellint_k_imp(T k, const Policy& pol, std::integral_constant const&); +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, boost::math::integral_constant const&); template -T ellint_k_imp(T k, const Policy& pol, std::integral_constant const&); +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, boost::math::integral_constant const&); template -T ellint_k_imp(T k, const Policy& pol, std::integral_constant const&); +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, boost::math::integral_constant const&); template -T ellint_k_imp(T k, const Policy& pol, T one_minus_k2); +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, T one_minus_k2); // Elliptic integral (Legendre form) of the first kind template -T ellint_f_imp(T phi, T k, const Policy& pol, T one_minus_k2) +BOOST_MATH_GPU_ENABLED T ellint_f_imp(T phi, T k, const Policy& pol, T one_minus_k2) { BOOST_MATH_STD_USING using namespace boost::math::tools; using namespace boost::math::constants; - static const char* function = "boost::math::ellint_f<%1%>(%1%,%1%)"; + constexpr auto function = "boost::math::ellint_f<%1%>(%1%,%1%)"; BOOST_MATH_INSTRUMENT_VARIABLE(phi); BOOST_MATH_INSTRUMENT_VARIABLE(k); BOOST_MATH_INSTRUMENT_VARIABLE(function); @@ -149,19 +152,19 @@ T ellint_f_imp(T phi, T k, const Policy& pol, T one_minus_k2) } template -inline T ellint_f_imp(T phi, T k, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T ellint_f_imp(T phi, T k, const Policy& pol) { return ellint_f_imp(phi, k, pol, T(1 - k * k)); } // Complete elliptic integral (Legendre form) of the first kind template -T ellint_k_imp(T k, const Policy& pol, T one_minus_k2) +BOOST_MATH_GPU_ENABLED T ellint_k_imp(T k, const Policy& pol, T one_minus_k2) { BOOST_MATH_STD_USING using namespace boost::math::tools; - static const char* function = "boost::math::ellint_k<%1%>(%1%)"; + constexpr auto function = "boost::math::ellint_k<%1%>(%1%)"; if (abs(k) > 1) { @@ -179,7 +182,7 @@ T ellint_k_imp(T k, const Policy& pol, T one_minus_k2) return value; } template -inline T ellint_k_imp(T k, const Policy& pol, std::integral_constant const&) +BOOST_MATH_GPU_ENABLED inline T ellint_k_imp(T k, const Policy& pol, boost::math::integral_constant const&) { return ellint_k_imp(k, pol, T(1 - k * k)); } @@ -201,9 +204,9 @@ inline T ellint_k_imp(T k, const Policy& pol, std::integral_constant con // archived in the code below), but was found to have slightly higher error rates. // template -BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, std::integral_constant const&) +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, boost::math::integral_constant const&) { - using std::abs; + BOOST_MATH_STD_USING using namespace boost::math::tools; T m = k * k; @@ -454,7 +457,7 @@ BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, std::integral_cons // This handles all cases where m > 0.9, // including all error handling: // - return ellint_k_imp(k, pol, std::integral_constant()); + return ellint_k_imp(k, pol, boost::math::integral_constant()); #if 0 else { @@ -474,9 +477,9 @@ BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, std::integral_cons } } template -BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, std::integral_constant const&) +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, boost::math::integral_constant const&) { - using std::abs; + BOOST_MATH_STD_USING using namespace boost::math::tools; T m = k * k; @@ -755,44 +758,37 @@ BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, std::integral_cons // All cases where m > 0.9 // including all error handling: // - return ellint_k_imp(k, pol, std::integral_constant()); + return ellint_k_imp(k, pol, boost::math::integral_constant()); } } template -BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_1(T k, const Policy& pol, const std::true_type&) +BOOST_MATH_GPU_ENABLED typename tools::promote_args::type ellint_1(T k, const Policy& pol, const boost::math::true_type&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; - typedef std::integral_constant::value && std::numeric_limits::digits && (std::numeric_limits::digits <= 54) ? 0 : - std::is_floating_point::value && std::numeric_limits::digits && (std::numeric_limits::digits <= 64) ? 1 : 2 + boost::math::is_floating_point::value && boost::math::numeric_limits::digits && (boost::math::numeric_limits::digits <= 54) ? 0 : + boost::math::is_floating_point::value && boost::math::numeric_limits::digits && (boost::math::numeric_limits::digits <= 64) ? 1 : 2 #endif > precision_tag_type; return policies::checked_narrowing_cast(detail::ellint_k_imp(static_cast(k), pol, precision_tag_type()), "boost::math::ellint_1<%1%>(%1%)"); } template -BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_1(T1 k, T2 phi, const std::false_type&) +BOOST_MATH_GPU_ENABLED typename tools::promote_args::type ellint_1(T1 k, T2 phi, const boost::math::false_type&) { return boost::math::ellint_1(k, phi, policies::policy<>()); } -} - -// Complete elliptic integral (Legendre form) of the first kind -template -BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_1(T k) -{ - return ellint_1(k, policies::policy<>()); -} +} // namespace detail // Elliptic integral (Legendre form) of the first kind template -BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_1(T1 k, T2 phi, const Policy& pol) // LCOV_EXCL_LINE gcc misses this but sees the function body, strange! +BOOST_MATH_GPU_ENABLED typename tools::promote_args::type ellint_1(T1 k, T2 phi, const Policy& pol) // LCOV_EXCL_LINE gcc misses this but sees the function body, strange! { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -800,12 +796,19 @@ BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_1(T1 k, } template -BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_1(T1 k, T2 phi) +BOOST_MATH_GPU_ENABLED typename tools::promote_args::type ellint_1(T1 k, T2 phi) { typedef typename policies::is_policy::type tag_type; return detail::ellint_1(k, phi, tag_type()); } +// Complete elliptic integral (Legendre form) of the first kind +template +BOOST_MATH_GPU_ENABLED typename tools::promote_args::type ellint_1(T k) +{ + return ellint_1(k, policies::policy<>()); +} + }} // namespaces #endif // BOOST_MATH_ELLINT_1_HPP diff --git a/include/boost/math/special_functions/ellint_2.hpp b/include/boost/math/special_functions/ellint_2.hpp index b09cdd490e..0cc1fa0944 100644 --- a/include/boost/math/special_functions/ellint_2.hpp +++ b/include/boost/math/special_functions/ellint_2.hpp @@ -1,5 +1,6 @@ // Copyright (c) 2006 Xiaogang Zhang // Copyright (c) 2006 John Maddock +// Copyright (c) 2024 Matt Borland // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -18,6 +19,9 @@ #pragma once #endif +#include +#include +#include #include #include #include @@ -33,20 +37,20 @@ namespace boost { namespace math { template -typename tools::promote_args::type ellint_2(T1 k, T2 phi, const Policy& pol); +BOOST_MATH_GPU_ENABLED typename tools::promote_args::type ellint_2(T1 k, T2 phi, const Policy& pol); namespace detail{ template -T ellint_e_imp(T k, const Policy& pol, const std::integral_constant&); +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, const boost::math::integral_constant&); template -T ellint_e_imp(T k, const Policy& pol, const std::integral_constant&); +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, const boost::math::integral_constant&); template -T ellint_e_imp(T k, const Policy& pol, const std::integral_constant&); +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, const boost::math::integral_constant&); // Elliptic integral (Legendre form) of the second kind template -T ellint_e_imp(T phi, T k, const Policy& pol) +BOOST_MATH_GPU_ENABLED T ellint_e_imp(T phi, T k, const Policy& pol) { BOOST_MATH_STD_USING using namespace boost::math::tools; @@ -71,9 +75,9 @@ T ellint_e_imp(T phi, T k, const Policy& pol) } else if(phi > 1 / tools::epsilon()) { - typedef std::integral_constant::value&& std::numeric_limits::digits && (std::numeric_limits::digits <= 54) ? 0 : - std::is_floating_point::value && std::numeric_limits::digits && (std::numeric_limits::digits <= 64) ? 1 : 2 + typedef boost::math::integral_constant::value&& boost::math::numeric_limits::digits && (boost::math::numeric_limits::digits <= 54) ? 0 : + boost::math::is_floating_point::value && boost::math::numeric_limits::digits && (boost::math::numeric_limits::digits <= 64) ? 1 : 2 > precision_tag_type; // Phi is so large that phi%pi is necessarily zero (or garbage), // just return the second part of the duplication formula: @@ -138,9 +142,9 @@ T ellint_e_imp(T phi, T k, const Policy& pol) } if (m != 0) { - typedef std::integral_constant::value&& std::numeric_limits::digits && (std::numeric_limits::digits <= 54) ? 0 : - std::is_floating_point::value && std::numeric_limits::digits && (std::numeric_limits::digits <= 64) ? 1 : 2 + typedef boost::math::integral_constant::value&& boost::math::numeric_limits::digits && (boost::math::numeric_limits::digits <= 54) ? 0 : + boost::math::is_floating_point::value && boost::math::numeric_limits::digits && (boost::math::numeric_limits::digits <= 64) ? 1 : 2 > precision_tag_type; result += m * ellint_e_imp(k, pol, precision_tag_type()); } @@ -150,7 +154,7 @@ T ellint_e_imp(T phi, T k, const Policy& pol) // Complete elliptic integral (Legendre form) of the second kind template -T ellint_e_imp(T k, const Policy& pol, std::integral_constant const&) +BOOST_MATH_GPU_ENABLED T ellint_e_imp(T k, const Policy& pol, boost::math::integral_constant const&) { BOOST_MATH_STD_USING using namespace boost::math::tools; @@ -188,9 +192,9 @@ T ellint_e_imp(T k, const Policy& pol, std::integral_constant const&) // existing routines. // template -BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, std::integral_constant const&) +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, boost::math::integral_constant const&) { - using std::abs; + BOOST_MATH_STD_USING using namespace boost::math::tools; T m = k * k; @@ -423,13 +427,13 @@ BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, std::integral_cons // All cases where m > 0.9 // including all error handling: // - return ellint_e_imp(k, pol, std::integral_constant()); + return ellint_e_imp(k, pol, boost::math::integral_constant()); } } template -BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, std::integral_constant const&) +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, boost::math::integral_constant const&) { - using std::abs; + BOOST_MATH_STD_USING using namespace boost::math::tools; T m = k * k; @@ -696,54 +700,56 @@ BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, std::integral_cons // All cases where m > 0.9 // including all error handling: // - return ellint_e_imp(k, pol, std::integral_constant()); + return ellint_e_imp(k, pol, boost::math::integral_constant()); } } template -BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_2(T k, const Policy& pol, const std::true_type&) +BOOST_MATH_GPU_ENABLED typename tools::promote_args::type ellint_2(T k, const Policy& pol, const boost::math::true_type&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; - typedef std::integral_constant::value&& std::numeric_limits::digits && (std::numeric_limits::digits <= 54) ? 0 : - std::is_floating_point::value && std::numeric_limits::digits && (std::numeric_limits::digits <= 64) ? 1 : 2 + typedef boost::math::integral_constant::value&& boost::math::numeric_limits::digits && (boost::math::numeric_limits::digits <= 54) ? 0 : + boost::math::is_floating_point::value && boost::math::numeric_limits::digits && (boost::math::numeric_limits::digits <= 64) ? 1 : 2 > precision_tag_type; return policies::checked_narrowing_cast(detail::ellint_e_imp(static_cast(k), pol, precision_tag_type()), "boost::math::ellint_2<%1%>(%1%)"); } // Elliptic integral (Legendre form) of the second kind template -BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_2(T1 k, T2 phi, const std::false_type&) +BOOST_MATH_GPU_ENABLED typename tools::promote_args::type ellint_2(T1 k, T2 phi, const boost::math::false_type&) { return boost::math::ellint_2(k, phi, policies::policy<>()); } } // detail -// Complete elliptic integral (Legendre form) of the second kind -template -BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_2(T k) -{ - return ellint_2(k, policies::policy<>()); -} - // Elliptic integral (Legendre form) of the second kind template -BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_2(T1 k, T2 phi) +BOOST_MATH_GPU_ENABLED typename tools::promote_args::type ellint_2(T1 k, T2 phi) { typedef typename policies::is_policy::type tag_type; return detail::ellint_2(k, phi, tag_type()); } template -BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_2(T1 k, T2 phi, const Policy& pol) // LCOV_EXCL_LINE gcc misses this but sees the function body, strange! +BOOST_MATH_GPU_ENABLED typename tools::promote_args::type ellint_2(T1 k, T2 phi, const Policy& pol) // LCOV_EXCL_LINE gcc misses this but sees the function body, strange! { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; return policies::checked_narrowing_cast(detail::ellint_e_imp(static_cast(phi), static_cast(k), pol), "boost::math::ellint_2<%1%>(%1%,%1%)"); } + +// Complete elliptic integral (Legendre form) of the second kind +template +BOOST_MATH_GPU_ENABLED typename tools::promote_args::type ellint_2(T k) +{ + return ellint_2(k, policies::policy<>()); +} + + }} // namespaces #endif // BOOST_MATH_ELLINT_2_HPP diff --git a/include/boost/math/special_functions/ellint_3.hpp b/include/boost/math/special_functions/ellint_3.hpp index 33acc545dc..b8df7e2645 100644 --- a/include/boost/math/special_functions/ellint_3.hpp +++ b/include/boost/math/special_functions/ellint_3.hpp @@ -18,6 +18,8 @@ #pragma once #endif +#include +#include #include #include #include @@ -38,16 +40,16 @@ namespace boost { namespace math { namespace detail{ template -T ellint_pi_imp(T v, T k, T vc, const Policy& pol); +BOOST_MATH_CUDA_ENABLED T ellint_pi_imp(T v, T k, T vc, const Policy& pol); // Elliptic integral (Legendre form) of the third kind template -T ellint_pi_imp(T v, T phi, T k, T vc, const Policy& pol) +BOOST_MATH_CUDA_ENABLED T ellint_pi_imp(T v, T phi, T k, T vc, const Policy& pol) { // Note vc = 1-v presumably without cancellation error. BOOST_MATH_STD_USING - static const char* function = "boost::math::ellint_3<%1%>(%1%,%1%,%1%)"; + constexpr auto function = "boost::math::ellint_3<%1%>(%1%,%1%,%1%)"; T sphi = sin(fabs(phi)); @@ -270,13 +272,13 @@ T ellint_pi_imp(T v, T phi, T k, T vc, const Policy& pol) // Complete elliptic integral (Legendre form) of the third kind template -T ellint_pi_imp(T v, T k, T vc, const Policy& pol) +BOOST_MATH_CUDA_ENABLED T ellint_pi_imp(T v, T k, T vc, const Policy& pol) { // Note arg vc = 1-v, possibly without cancellation errors BOOST_MATH_STD_USING using namespace boost::math::tools; - static const char* function = "boost::math::ellint_pi<%1%>(%1%,%1%)"; + constexpr auto function = "boost::math::ellint_pi<%1%>(%1%,%1%)"; if (abs(k) >= 1) { @@ -318,13 +320,13 @@ T ellint_pi_imp(T v, T k, T vc, const Policy& pol) } template -inline typename tools::promote_args::type ellint_3(T1 k, T2 v, T3 phi, const std::false_type&) +BOOST_MATH_CUDA_ENABLED inline typename tools::promote_args::type ellint_3(T1 k, T2 v, T3 phi, const boost::math::false_type&) { return boost::math::ellint_3(k, v, phi, policies::policy<>()); } template -inline typename tools::promote_args::type ellint_3(T1 k, T2 v, const Policy& pol, const std::true_type&) +BOOST_MATH_CUDA_ENABLED inline typename tools::promote_args::type ellint_3(T1 k, T2 v, const Policy& pol, const boost::math::true_type&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -339,7 +341,7 @@ inline typename tools::promote_args::type ellint_3(T1 k, T2 v, const Pol } // namespace detail template -inline typename tools::promote_args::type ellint_3(T1 k, T2 v, T3 phi, const Policy&) +BOOST_MATH_CUDA_ENABLED inline typename tools::promote_args::type ellint_3(T1 k, T2 v, T3 phi, const Policy&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -354,14 +356,14 @@ inline typename tools::promote_args::type ellint_3(T1 k, T2 v, T3 ph } template -typename detail::ellint_3_result::type ellint_3(T1 k, T2 v, T3 phi) +BOOST_MATH_CUDA_ENABLED typename detail::ellint_3_result::type ellint_3(T1 k, T2 v, T3 phi) { typedef typename policies::is_policy::type tag_type; return detail::ellint_3(k, v, phi, tag_type()); } template -inline typename tools::promote_args::type ellint_3(T1 k, T2 v) +BOOST_MATH_CUDA_ENABLED inline typename tools::promote_args::type ellint_3(T1 k, T2 v) { return ellint_3(k, v, policies::policy<>()); } diff --git a/include/boost/math/special_functions/ellint_d.hpp b/include/boost/math/special_functions/ellint_d.hpp index da1e87ba3e..f5a8491f5a 100644 --- a/include/boost/math/special_functions/ellint_d.hpp +++ b/include/boost/math/special_functions/ellint_d.hpp @@ -1,5 +1,6 @@ // Copyright (c) 2006 Xiaogang Zhang // Copyright (c) 2006 John Maddock +// Copyright (c) 2024 Matt Borland // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -18,6 +19,8 @@ #pragma once #endif +#include +#include #include #include #include @@ -33,16 +36,16 @@ namespace boost { namespace math { template -typename tools::promote_args::type ellint_d(T1 k, T2 phi, const Policy& pol); +BOOST_MATH_GPU_ENABLED typename tools::promote_args::type ellint_d(T1 k, T2 phi, const Policy& pol); namespace detail{ template -T ellint_d_imp(T k, const Policy& pol); +BOOST_MATH_GPU_ENABLED T ellint_d_imp(T k, const Policy& pol); // Elliptic integral (Legendre form) of the second kind template -T ellint_d_imp(T phi, T k, const Policy& pol) +BOOST_MATH_GPU_ENABLED T ellint_d_imp(T phi, T k, const Policy& pol) { BOOST_MATH_STD_USING using namespace boost::math::tools; @@ -113,7 +116,7 @@ T ellint_d_imp(T phi, T k, const Policy& pol) // Complete elliptic integral (Legendre form) of the second kind template -T ellint_d_imp(T k, const Policy& pol) +BOOST_MATH_GPU_ENABLED T ellint_d_imp(T k, const Policy& pol) { BOOST_MATH_STD_USING using namespace boost::math::tools; @@ -135,7 +138,7 @@ T ellint_d_imp(T k, const Policy& pol) } template -inline typename tools::promote_args::type ellint_d(T k, const Policy& pol, const std::true_type&) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ellint_d(T k, const Policy& pol, const boost::math::true_type&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -144,7 +147,7 @@ inline typename tools::promote_args::type ellint_d(T k, const Policy& pol, co // Elliptic integral (Legendre form) of the second kind template -inline typename tools::promote_args::type ellint_d(T1 k, T2 phi, const std::false_type&) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ellint_d(T1 k, T2 phi, const boost::math::false_type&) { return boost::math::ellint_d(k, phi, policies::policy<>()); } @@ -153,21 +156,21 @@ inline typename tools::promote_args::type ellint_d(T1 k, T2 phi, const s // Complete elliptic integral (Legendre form) of the second kind template -inline typename tools::promote_args::type ellint_d(T k) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ellint_d(T k) { return ellint_d(k, policies::policy<>()); } // Elliptic integral (Legendre form) of the second kind template -inline typename tools::promote_args::type ellint_d(T1 k, T2 phi) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ellint_d(T1 k, T2 phi) { typedef typename policies::is_policy::type tag_type; return detail::ellint_d(k, phi, tag_type()); } template -inline typename tools::promote_args::type ellint_d(T1 k, T2 phi, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ellint_d(T1 k, T2 phi, const Policy& pol) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; diff --git a/include/boost/math/special_functions/ellint_rc.hpp b/include/boost/math/special_functions/ellint_rc.hpp index 2f9a1f8cfb..ae3c6375e5 100644 --- a/include/boost/math/special_functions/ellint_rc.hpp +++ b/include/boost/math/special_functions/ellint_rc.hpp @@ -1,4 +1,5 @@ // Copyright (c) 2006 Xiaogang Zhang, 2015 John Maddock +// Copyright (c) 2024 Matt Borland // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -18,12 +19,11 @@ #pragma once #endif -#include #include +#include #include #include #include -#include // Carlson's degenerate elliptic integral // R_C(x, y) = R_F(x, y, y) = 0.5 * \int_{0}^{\infty} (t+x)^{-1/2} (t+y)^{-1} dt @@ -32,11 +32,11 @@ namespace boost { namespace math { namespace detail{ template -T ellint_rc_imp(T x, T y, const Policy& pol) +BOOST_MATH_GPU_ENABLED T ellint_rc_imp(T x, T y, const Policy& pol) { BOOST_MATH_STD_USING - static const char* function = "boost::math::ellint_rc<%1%>(%1%,%1%)"; + constexpr auto function = "boost::math::ellint_rc<%1%>(%1%,%1%)"; if(x < 0) { @@ -88,7 +88,7 @@ T ellint_rc_imp(T x, T y, const Policy& pol) } // namespace detail template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ellint_rc(T1 x, T2 y, const Policy& pol) { typedef typename tools::promote_args::type result_type; @@ -100,7 +100,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ellint_rc(T1 x, T2 y) { return ellint_rc(x, y, policies::policy<>()); diff --git a/include/boost/math/special_functions/ellint_rd.hpp b/include/boost/math/special_functions/ellint_rd.hpp index 2a79e54ca2..f2a33adc46 100644 --- a/include/boost/math/special_functions/ellint_rd.hpp +++ b/include/boost/math/special_functions/ellint_rd.hpp @@ -1,4 +1,5 @@ // Copyright (c) 2006 Xiaogang Zhang, 2015 John Maddock. +// Copyright (c) 2024 Matt Borland // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -16,10 +17,10 @@ #pragma once #endif +#include +#include #include #include -#include -#include #include // Carlson's elliptic integral of the second kind @@ -29,12 +30,11 @@ namespace boost { namespace math { namespace detail{ template -T ellint_rd_imp(T x, T y, T z, const Policy& pol) +BOOST_MATH_GPU_ENABLED T ellint_rd_imp(T x, T y, T z, const Policy& pol) { BOOST_MATH_STD_USING - using std::swap; - static const char* function = "boost::math::ellint_rd<%1%>(%1%,%1%,%1%)"; + constexpr auto function = "boost::math::ellint_rd<%1%>(%1%,%1%,%1%)"; if(x < 0) { @@ -55,9 +55,11 @@ T ellint_rd_imp(T x, T y, T z, const Policy& pol) // // Special cases from http://dlmf.nist.gov/19.20#iv // - using std::swap; + if(x == z) - swap(x, y); + { + BOOST_MATH_GPU_SAFE_SWAP(x, y); + } if(y == z) { if(x == y) @@ -70,19 +72,21 @@ T ellint_rd_imp(T x, T y, T z, const Policy& pol) } else { - if((std::max)(x, y) / (std::min)(x, y) > T(1.3)) + if(BOOST_MATH_GPU_SAFE_MAX(x, y) / BOOST_MATH_GPU_SAFE_MIN(x, y) > T(1.3)) return 3 * (ellint_rc_imp(x, y, pol) - sqrt(x) / y) / (2 * (y - x)); // Otherwise fall through to avoid cancellation in the above (RC(x,y) -> 1/x^0.5 as x -> y) } } if(x == y) { - if((std::max)(x, z) / (std::min)(x, z) > T(1.3)) + if(BOOST_MATH_GPU_SAFE_MAX(x, z) / BOOST_MATH_GPU_SAFE_MIN(x, z) > T(1.3)) return 3 * (ellint_rc_imp(z, x, pol) - 1 / sqrt(z)) / (z - x); // Otherwise fall through to avoid cancellation in the above (RC(x,y) -> 1/x^0.5 as x -> y) } if(y == 0) - swap(x, y); + { + BOOST_MATH_GPU_SAFE_SWAP(x, y); + } if(x == 0) { // @@ -102,7 +106,8 @@ T ellint_rd_imp(T x, T y, T z, const Policy& pol) xn = (xn + yn) / 2; yn = t; sum_pow *= 2; - sum += sum_pow * boost::math::pow<2>(xn - yn); + const auto temp = (xn - yn); + sum += sum_pow * temp * temp; } T RF = constants::pi() / (xn + yn); // @@ -128,7 +133,7 @@ T ellint_rd_imp(T x, T y, T z, const Policy& pol) T An = (x + y + 3 * z) / 5; T A0 = An; // This has an extra 1.2 fudge factor which is really only needed when x, y and z are close in magnitude: - T Q = pow(tools::epsilon() / 4, -T(1) / 8) * (std::max)((std::max)(An - x, An - y), An - z) * 1.2f; + T Q = pow(tools::epsilon() / 4, -T(1) / 8) * BOOST_MATH_GPU_SAFE_MAX(BOOST_MATH_GPU_SAFE_MAX(An - x, An - y), An - z) * 1.2f; BOOST_MATH_INSTRUMENT_VARIABLE(Q); T lambda, rx, ry, rz; unsigned k = 0; @@ -177,7 +182,7 @@ T ellint_rd_imp(T x, T y, T z, const Policy& pol) } // namespace detail template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ellint_rd(T1 x, T2 y, T3 z, const Policy& pol) { typedef typename tools::promote_args::type result_type; @@ -190,7 +195,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ellint_rd(T1 x, T2 y, T3 z) { return ellint_rd(x, y, z, policies::policy<>()); diff --git a/include/boost/math/special_functions/ellint_rf.hpp b/include/boost/math/special_functions/ellint_rf.hpp index c781ac0353..eb1c2b6e71 100644 --- a/include/boost/math/special_functions/ellint_rf.hpp +++ b/include/boost/math/special_functions/ellint_rf.hpp @@ -1,4 +1,5 @@ // Copyright (c) 2006 Xiaogang Zhang, 2015 John Maddock +// Copyright (c) 2024 Matt Borland // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -17,8 +18,9 @@ #pragma once #endif -#include #include +#include +#include #include #include #include @@ -30,21 +32,20 @@ namespace boost { namespace math { namespace detail{ template - T ellint_rf_imp(T x, T y, T z, const Policy& pol) + BOOST_MATH_GPU_ENABLED T ellint_rf_imp(T x, T y, T z, const Policy& pol) { BOOST_MATH_STD_USING using namespace boost::math; - using std::swap; - static const char* function = "boost::math::ellint_rf<%1%>(%1%,%1%,%1%)"; + constexpr auto function = "boost::math::ellint_rf<%1%>(%1%,%1%,%1%)"; if(x < 0 || y < 0 || z < 0) { - return policies::raise_domain_error(function, "domain error, all arguments must be non-negative, only sensible result is %1%.", std::numeric_limits::quiet_NaN(), pol); + return policies::raise_domain_error(function, "domain error, all arguments must be non-negative, only sensible result is %1%.", boost::math::numeric_limits::quiet_NaN(), pol); } if(x + y == 0 || y + z == 0 || z + x == 0) { - return policies::raise_domain_error(function, "domain error, at most one argument can be zero, only sensible result is %1%.", std::numeric_limits::quiet_NaN(), pol); + return policies::raise_domain_error(function, "domain error, at most one argument can be zero, only sensible result is %1%.", boost::math::numeric_limits::quiet_NaN(), pol); } // // Special cases from http://dlmf.nist.gov/19.20#i @@ -80,9 +81,9 @@ namespace boost { namespace math { namespace detail{ return ellint_rc_imp(x, y, pol); } if(x == 0) - swap(x, z); + BOOST_MATH_GPU_SAFE_SWAP(x, z); else if(y == 0) - swap(y, z); + BOOST_MATH_GPU_SAFE_SWAP(y, z); if(z == 0) { // @@ -105,7 +106,7 @@ namespace boost { namespace math { namespace detail{ T zn = z; T An = (x + y + z) / 3; T A0 = An; - T Q = pow(3 * boost::math::tools::epsilon(), T(-1) / 8) * (std::max)((std::max)(fabs(An - xn), fabs(An - yn)), fabs(An - zn)); + T Q = pow(3 * boost::math::tools::epsilon(), T(-1) / 8) * BOOST_MATH_GPU_SAFE_MAX(BOOST_MATH_GPU_SAFE_MAX(fabs(An - xn), fabs(An - yn)), fabs(An - zn)); T fn = 1; @@ -143,7 +144,7 @@ namespace boost { namespace math { namespace detail{ } // namespace detail template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ellint_rf(T1 x, T2 y, T3 z, const Policy& pol) { typedef typename tools::promote_args::type result_type; @@ -156,7 +157,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ellint_rf(T1 x, T2 y, T3 z) { return ellint_rf(x, y, z, policies::policy<>()); diff --git a/include/boost/math/special_functions/ellint_rg.hpp b/include/boost/math/special_functions/ellint_rg.hpp index 051c104bca..8a7f706ac0 100644 --- a/include/boost/math/special_functions/ellint_rg.hpp +++ b/include/boost/math/special_functions/ellint_rg.hpp @@ -10,8 +10,8 @@ #pragma once #endif -#include #include +#include #include #include #include @@ -21,27 +21,26 @@ namespace boost { namespace math { namespace detail{ template - T ellint_rg_imp(T x, T y, T z, const Policy& pol) + BOOST_MATH_GPU_ENABLED T ellint_rg_imp(T x, T y, T z, const Policy& pol) { BOOST_MATH_STD_USING - static const char* function = "boost::math::ellint_rf<%1%>(%1%,%1%,%1%)"; + constexpr auto function = "boost::math::ellint_rf<%1%>(%1%,%1%,%1%)"; if(x < 0 || y < 0 || z < 0) { - return policies::raise_domain_error(function, "domain error, all arguments must be non-negative, only sensible result is %1%.", std::numeric_limits::quiet_NaN(), pol); + return policies::raise_domain_error(function, "domain error, all arguments must be non-negative, only sensible result is %1%.", boost::math::numeric_limits::quiet_NaN(), pol); } // // Function is symmetric in x, y and z, but we require // (x - z)(y - z) >= 0 to avoid cancellation error in the result // which implies (for example) x >= z >= y // - using std::swap; if(x < y) - swap(x, y); + BOOST_MATH_GPU_SAFE_SWAP(x, y); if(x < z) - swap(x, z); + BOOST_MATH_GPU_SAFE_SWAP(x, z); if(y > z) - swap(y, z); + BOOST_MATH_GPU_SAFE_SWAP(y, z); BOOST_MATH_ASSERT(x >= z); BOOST_MATH_ASSERT(z >= y); @@ -64,7 +63,7 @@ namespace boost { namespace math { namespace detail{ else { // x = z, y != 0 - swap(x, y); + BOOST_MATH_GPU_SAFE_SWAP(x, y); return (x == 0) ? T(sqrt(z) / 2) : T((z * ellint_rc_imp(x, z, pol) + sqrt(x)) / 2); } } @@ -75,7 +74,7 @@ namespace boost { namespace math { namespace detail{ } else if(y == 0) { - swap(y, z); + BOOST_MATH_GPU_SAFE_SWAP(y, z); // // Special handling for common case, from // Numerical Computation of Real or Complex Elliptic Integrals, eq.46 @@ -106,7 +105,7 @@ namespace boost { namespace math { namespace detail{ } // namespace detail template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ellint_rg(T1 x, T2 y, T3 z, const Policy& pol) { typedef typename tools::promote_args::type result_type; @@ -119,7 +118,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ellint_rg(T1 x, T2 y, T3 z) { return ellint_rg(x, y, z, policies::policy<>()); diff --git a/include/boost/math/special_functions/ellint_rj.hpp b/include/boost/math/special_functions/ellint_rj.hpp index f19eac2843..76e1a14eb4 100644 --- a/include/boost/math/special_functions/ellint_rj.hpp +++ b/include/boost/math/special_functions/ellint_rj.hpp @@ -1,4 +1,5 @@ // Copyright (c) 2006 Xiaogang Zhang, 2015 John Maddock +// Copyright (c) 2024 Matt Borland // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -18,8 +19,9 @@ #pragma once #endif -#include #include +#include +#include #include #include #include @@ -32,7 +34,7 @@ namespace boost { namespace math { namespace detail{ template -T ellint_rc1p_imp(T y, const Policy& pol) +BOOST_MATH_GPU_ENABLED T ellint_rc1p_imp(T y, const Policy& pol) { using namespace boost::math; // Calculate RC(1, 1 + x) @@ -70,11 +72,11 @@ T ellint_rc1p_imp(T y, const Policy& pol) } template -T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol) +BOOST_MATH_GPU_ENABLED T ellint_rj_imp_final(T x, T y, T z, T p, const Policy& pol) { BOOST_MATH_STD_USING - static const char* function = "boost::math::ellint_rj<%1%>(%1%,%1%,%1%)"; + constexpr auto function = "boost::math::ellint_rj<%1%>(%1%,%1%,%1%)"; if(x < 0) { @@ -94,37 +96,7 @@ T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol) } if(x + y == 0 || y + z == 0 || z + x == 0) { - return policies::raise_domain_error(function, "At most one argument can be zero, only possible result is %1%.", std::numeric_limits::quiet_NaN(), pol); - } - - // for p < 0, the integral is singular, return Cauchy principal value - if(p < 0) - { - // - // We must ensure that x < y < z. - // Since the integral is symmetrical in x, y and z - // we can just permute the values: - // - if(x > y) - std::swap(x, y); - if(y > z) - std::swap(y, z); - if(x > y) - std::swap(x, y); - - BOOST_MATH_ASSERT(x <= y); - BOOST_MATH_ASSERT(y <= z); - - T q = -p; - p = (z * (x + y + q) - x * y) / (z + q); - - BOOST_MATH_ASSERT(p >= 0); - - T value = (p - z) * ellint_rj_imp(x, y, z, p, pol); - value -= 3 * ellint_rf_imp(x, y, z, pol); - value += 3 * sqrt((x * y * z) / (x * y + p * q)) * ellint_rc_imp(T(x * y + p * q), T(p * q), pol); - value /= (z + q); - return value; + return policies::raise_domain_error(function, "At most one argument can be zero, only possible result is %1%.", boost::math::numeric_limits::quiet_NaN(), pol); } // @@ -148,13 +120,12 @@ T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol) else { // x = y only, permute so y = z: - using std::swap; - swap(x, z); + BOOST_MATH_GPU_SAFE_SWAP(x, z); if(y == p) { return ellint_rd_imp(x, y, y, pol); } - else if((std::max)(y, p) / (std::min)(y, p) > T(1.2)) + else if(BOOST_MATH_GPU_SAFE_MAX(y, p) / BOOST_MATH_GPU_SAFE_MIN(y, p) > T(1.2)) { return 3 * (ellint_rc_imp(x, y, pol) - ellint_rc_imp(x, p, pol)) / (p - y); } @@ -168,7 +139,7 @@ T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol) // y = z = p: return ellint_rd_imp(x, y, y, pol); } - else if((std::max)(y, p) / (std::min)(y, p) > T(1.2)) + else if(BOOST_MATH_GPU_SAFE_MAX(y, p) / BOOST_MATH_GPU_SAFE_MIN(y, p) > T(1.2)) { // y = z: return 3 * (ellint_rc_imp(x, y, pol) - ellint_rc_imp(x, p, pol)) / (p - y); @@ -187,7 +158,7 @@ T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol) T An = (x + y + z + 2 * p) / 5; T A0 = An; T delta = (p - x) * (p - y) * (p - z); - T Q = pow(tools::epsilon() / 5, -T(1) / 8) * (std::max)((std::max)(fabs(An - x), fabs(An - y)), (std::max)(fabs(An - z), fabs(An - p))); + T Q = pow(tools::epsilon() / 5, -T(1) / 8) * BOOST_MATH_GPU_SAFE_MAX(BOOST_MATH_GPU_SAFE_MAX(fabs(An - x), fabs(An - y)), BOOST_MATH_GPU_SAFE_MAX(fabs(An - z), fabs(An - p))); unsigned n; T lambda; @@ -260,10 +231,71 @@ T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol) return result; } +template +BOOST_MATH_GPU_ENABLED T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol) +{ + BOOST_MATH_STD_USING + + constexpr auto function = "boost::math::ellint_rj<%1%>(%1%,%1%,%1%)"; + + if(x < 0) + { + return policies::raise_domain_error(function, "Argument x must be non-negative, but got x = %1%", x, pol); + } + if(y < 0) + { + return policies::raise_domain_error(function, "Argument y must be non-negative, but got y = %1%", y, pol); + } + if(z < 0) + { + return policies::raise_domain_error(function, "Argument z must be non-negative, but got z = %1%", z, pol); + } + if(p == 0) + { + return policies::raise_domain_error(function, "Argument p must not be zero, but got p = %1%", p, pol); + } + if(x + y == 0 || y + z == 0 || z + x == 0) + { + return policies::raise_domain_error(function, "At most one argument can be zero, only possible result is %1%.", boost::math::numeric_limits::quiet_NaN(), pol); + } + + // for p < 0, the integral is singular, return Cauchy principal value + if(p < 0) + { + // + // We must ensure that x < y < z. + // Since the integral is symmetrical in x, y and z + // we can just permute the values: + // + if(x > y) + BOOST_MATH_GPU_SAFE_SWAP(x, y); + if(y > z) + BOOST_MATH_GPU_SAFE_SWAP(y, z); + if(x > y) + BOOST_MATH_GPU_SAFE_SWAP(x, y); + + BOOST_MATH_ASSERT(x <= y); + BOOST_MATH_ASSERT(y <= z); + + T q = -p; + p = (z * (x + y + q) - x * y) / (z + q); + + BOOST_MATH_ASSERT(p >= 0); + + T value = (p - z) * ellint_rj_imp_final(x, y, z, p, pol); + value -= 3 * ellint_rf_imp(x, y, z, pol); + value += 3 * sqrt((x * y * z) / (x * y + p * q)) * ellint_rc_imp(T(x * y + p * q), T(p * q), pol); + value /= (z + q); + return value; + } + + return ellint_rj_imp_final(x, y, z, p, pol); +} + } // namespace detail template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ellint_rj(T1 x, T2 y, T3 z, T4 p, const Policy& pol) { typedef typename tools::promote_args::type result_type; @@ -278,7 +310,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type ellint_rj(T1 x, T2 y, T3 z, T4 p) { return ellint_rj(x, y, z, p, policies::policy<>()); diff --git a/include/boost/math/special_functions/erf.hpp b/include/boost/math/special_functions/erf.hpp index 57ff605299..9f0da9282f 100644 --- a/include/boost/math/special_functions/erf.hpp +++ b/include/boost/math/special_functions/erf.hpp @@ -1,4 +1,5 @@ // (C) Copyright John Maddock 2006. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -10,8 +11,11 @@ #pragma once #endif -#include #include + +#ifndef BOOST_MATH_HAS_NVRTC + +#include #include #include #include @@ -39,7 +43,7 @@ template struct erf_asympt_series_t { // LCOV_EXCL_START multiprecision case only, excluded from coverage analysis - erf_asympt_series_t(T z) : xx(2 * -z * z), tk(1) + BOOST_MATH_GPU_ENABLED erf_asympt_series_t(T z) : xx(2 * -z * z), tk(1) { BOOST_MATH_STD_USING result = -exp(-z * z) / sqrt(boost::math::constants::pi()); @@ -48,7 +52,7 @@ struct erf_asympt_series_t typedef T result_type; - T operator()() + BOOST_MATH_GPU_ENABLED T operator()() { BOOST_MATH_STD_USING T r = result; @@ -68,33 +72,33 @@ struct erf_asympt_series_t // How large z has to be in order to ensure that the series converges: // template -inline float erf_asymptotic_limit_N(const T&) +BOOST_MATH_GPU_ENABLED inline float erf_asymptotic_limit_N(const T&) { return (std::numeric_limits::max)(); } -inline float erf_asymptotic_limit_N(const std::integral_constant&) +BOOST_MATH_GPU_ENABLED inline float erf_asymptotic_limit_N(const std::integral_constant&) { return 2.8F; } -inline float erf_asymptotic_limit_N(const std::integral_constant&) +BOOST_MATH_GPU_ENABLED inline float erf_asymptotic_limit_N(const std::integral_constant&) { return 4.3F; } -inline float erf_asymptotic_limit_N(const std::integral_constant&) +BOOST_MATH_GPU_ENABLED inline float erf_asymptotic_limit_N(const std::integral_constant&) { return 4.8F; } -inline float erf_asymptotic_limit_N(const std::integral_constant&) +BOOST_MATH_GPU_ENABLED inline float erf_asymptotic_limit_N(const std::integral_constant&) { return 6.5F; } -inline float erf_asymptotic_limit_N(const std::integral_constant&) +BOOST_MATH_GPU_ENABLED inline float erf_asymptotic_limit_N(const std::integral_constant&) { return 6.8F; } template -inline T erf_asymptotic_limit() +BOOST_MATH_GPU_ENABLED inline T erf_asymptotic_limit() { typedef typename policies::precision::type precision_type; typedef std::integral_constant -T erf_imp(T z, bool invert, const Policy& pol, const std::integral_constant& t) +BOOST_MATH_GPU_ENABLED T erf_imp(T z, bool invert, const Policy& pol, const std::integral_constant&) { BOOST_MATH_STD_USING @@ -207,14 +211,30 @@ T erf_imp(T z, bool invert, const Policy& pol, const std::integral_constant(%1%)", "Expected a finite argument but got %1%", z, pol); + int prefix_multiplier = 1; + int prefix_adder = 0; + if(z < 0) { + // Recursion is logically simpler here, but confuses static analyzers that need to be + // able to calculate the maximimum program stack size at compile time (ie CUDA). + z = -z; if(!invert) - return -erf_imp(T(-z), invert, pol, t); + { + prefix_multiplier = -1; + // return -erf_imp(T(-z), invert, pol, t); + } else if(z < T(-0.5)) - return 2 - erf_imp(T(-z), invert, pol, t); + { + prefix_adder = 2; + // return 2 - erf_imp(T(-z), invert, pol, t); + } else - return 1 + erf_imp(T(-z), false, pol, t); + { + invert = false; + prefix_adder = 1; + // return 1 + erf_imp(T(-z), false, pol, t); + } } T result; @@ -237,7 +257,7 @@ T erf_imp(T z, bool invert, const Policy& pol, const std::integral_constant(z * 1.125f + z * c); } } @@ -248,15 +268,15 @@ T erf_imp(T z, bool invert, const Policy& pol, const std::integral_constantT erf_imp(T z, bool invert, const Lanczos& l, const std::integral_constant& t) @@ -1175,7 +1196,7 @@ T erf_imp(T z, bool invert, const Policy& pol, const std::integral_constant -inline typename tools::promote_args::type erf(T z, const Policy& /* pol */) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type erf(T z, const Policy& /* pol */) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -1208,7 +1229,7 @@ inline typename tools::promote_args::type erf(T z, const Policy& /* pol */) } template -inline typename tools::promote_args::type erfc(T z, const Policy& /* pol */) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type erfc(T z, const Policy& /* pol */) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -1241,13 +1262,13 @@ inline typename tools::promote_args::type erfc(T z, const Policy& /* pol */) } template -inline typename tools::promote_args::type erf(T z) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type erf(T z) { return boost::math::erf(z, policies::policy<>()); } template -inline typename tools::promote_args::type erfc(T z) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type erfc(T z) { return boost::math::erfc(z, policies::policy<>()); } @@ -1255,6 +1276,64 @@ inline typename tools::promote_args::type erfc(T z) } // namespace math } // namespace boost +#else // Special handling for NVRTC platform + +namespace boost { +namespace math { + +template +BOOST_MATH_GPU_ENABLED auto erf(T x) +{ + return ::erf(x); +} + +template <> +BOOST_MATH_GPU_ENABLED auto erf(float x) +{ + return ::erff(x); +} + +template +BOOST_MATH_GPU_ENABLED auto erf(T x, const Policy&) +{ + return ::erf(x); +} + +template +BOOST_MATH_GPU_ENABLED auto erf(float x, const Policy&) +{ + return ::erff(x); +} + +template +BOOST_MATH_GPU_ENABLED auto erfc(T x) +{ + return ::erfc(x); +} + +template <> +BOOST_MATH_GPU_ENABLED auto erfc(float x) +{ + return ::erfcf(x); +} + +template +BOOST_MATH_GPU_ENABLED auto erfc(T x, const Policy&) +{ + return ::erfc(x); +} + +template +BOOST_MATH_GPU_ENABLED auto erfc(float x, const Policy&) +{ + return ::erfcf(x); +} + +} // namespace math +} // namespace boost + +#endif // BOOST_MATH_HAS_NVRTC + #include #endif // BOOST_MATH_SPECIAL_ERF_HPP diff --git a/include/boost/math/special_functions/expint.hpp b/include/boost/math/special_functions/expint.hpp index 1475a9a88b..09e97bd4fc 100644 --- a/include/boost/math/special_functions/expint.hpp +++ b/include/boost/math/special_functions/expint.hpp @@ -1,4 +1,5 @@ // Copyright John Maddock 2007. +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -12,6 +13,10 @@ #pragma warning(disable:4702) // Unreachable code (release mode only warning) #endif +#include +#include +#include +#include #include #include #include @@ -20,7 +25,6 @@ #include #include #include -#include #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128) // @@ -35,13 +39,13 @@ namespace boost{ namespace math{ template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type expint(unsigned n, T z, const Policy& /*pol*/); namespace detail{ template -inline T expint_1_rational(const T& z, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED inline T expint_1_rational(const T& z, const boost::math::integral_constant&) { // this function is never actually called BOOST_MATH_ASSERT(0); @@ -49,7 +53,7 @@ inline T expint_1_rational(const T& z, const std::integral_constant&) } template -T expint_1_rational(const T& z, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T expint_1_rational(const T& z, const boost::math::integral_constant&) { BOOST_MATH_STD_USING T result; @@ -123,7 +127,7 @@ T expint_1_rational(const T& z, const std::integral_constant&) } template -T expint_1_rational(const T& z, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T expint_1_rational(const T& z, const boost::math::integral_constant&) { BOOST_MATH_STD_USING T result; @@ -204,7 +208,7 @@ T expint_1_rational(const T& z, const std::integral_constant&) } template -T expint_1_rational(const T& z, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T expint_1_rational(const T& z, const boost::math::integral_constant&) { BOOST_MATH_STD_USING T result; @@ -351,14 +355,15 @@ T expint_1_rational(const T& z, const std::integral_constant&) return result; } + template struct expint_fraction { - typedef std::pair result_type; - expint_fraction(unsigned n_, T z_) : b(n_ + z_), i(-1), n(n_){} - std::pair operator()() + typedef boost::math::pair result_type; + BOOST_MATH_GPU_ENABLED expint_fraction(unsigned n_, T z_) : b(n_ + z_), i(-1), n(n_){} + BOOST_MATH_GPU_ENABLED boost::math::pair operator()() { - std::pair result = std::make_pair(-static_cast((i+1) * (n+i)), b); + boost::math::pair result = boost::math::make_pair(-static_cast((i+1) * (n+i)), b); b += 2; ++i; return result; @@ -370,11 +375,11 @@ struct expint_fraction }; template -inline T expint_as_fraction(unsigned n, T z, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T expint_as_fraction(unsigned n, T z, const Policy& pol) { BOOST_MATH_STD_USING BOOST_MATH_INSTRUMENT_VARIABLE(z) - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); expint_fraction f(n, z); T result = tools::continued_fraction_b( f, @@ -392,9 +397,9 @@ template struct expint_series { typedef T result_type; - expint_series(unsigned k_, T z_, T x_k_, T denom_, T fact_) + BOOST_MATH_GPU_ENABLED expint_series(unsigned k_, T z_, T x_k_, T denom_, T fact_) : k(k_), z(z_), x_k(x_k_), denom(denom_), fact(fact_){} - T operator()() + BOOST_MATH_GPU_ENABLED T operator()() { x_k *= -z; denom += 1; @@ -410,10 +415,10 @@ struct expint_series }; template -inline T expint_as_series(unsigned n, T z, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T expint_as_series(unsigned n, T z, const Policy& pol) { BOOST_MATH_STD_USING - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); BOOST_MATH_INSTRUMENT_VARIABLE(z) @@ -443,10 +448,10 @@ inline T expint_as_series(unsigned n, T z, const Policy& pol) } template -T expint_imp(unsigned n, T z, const Policy& pol, const Tag& tag) +BOOST_MATH_GPU_ENABLED T expint_imp(unsigned n, T z, const Policy& pol, const Tag& tag) { BOOST_MATH_STD_USING - static const char* function = "boost::math::expint<%1%>(unsigned, %1%)"; + constexpr auto function = "boost::math::expint<%1%>(unsigned, %1%)"; if(z < 0) return policies::raise_domain_error(function, "Function requires z >= 0 but got %1%.", z, pol); if(z == 0) @@ -468,15 +473,21 @@ T expint_imp(unsigned n, T z, const Policy& pol, const Tag& tag) # pragma warning(disable:4127) // conditional expression is constant #endif if(n == 0) + { result = exp(-z) / z; + } else if((n == 1) && (Tag::value)) { result = expint_1_rational(z, tag); } else if(f) + { result = expint_as_series(n, z, pol); + } else + { result = expint_as_fraction(n, z, pol); + } #ifdef _MSC_VER # pragma warning(pop) #endif @@ -488,8 +499,8 @@ template struct expint_i_series { typedef T result_type; - expint_i_series(T z_) : k(0), z_k(1), z(z_){} - T operator()() + BOOST_MATH_GPU_ENABLED expint_i_series(T z_) : k(0), z_k(1), z(z_){} + BOOST_MATH_GPU_ENABLED T operator()() { z_k *= z / ++k; return z_k / k; @@ -501,22 +512,22 @@ struct expint_i_series }; template -T expint_i_as_series(T z, const Policy& pol) +BOOST_MATH_GPU_ENABLED T expint_i_as_series(T z, const Policy& pol) { BOOST_MATH_STD_USING T result = log(z); // (log(z) - log(1 / z)) / 2; result += constants::euler(); expint_i_series s(z); - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); result = tools::sum_series(s, policies::get_epsilon(), max_iter, result); policies::check_series_iterations("boost::math::expint_i_series<%1%>(%1%)", max_iter, pol); return result; } template -T expint_i_imp(T z, const Policy& pol, const Tag& tag) +BOOST_MATH_GPU_ENABLED T expint_i_imp(T z, const Policy& pol, const Tag& tag) { - static const char* function = "boost::math::expint<%1%>(%1%)"; + constexpr auto function = "boost::math::expint<%1%>(%1%)"; if(z < 0) return -expint_imp(1, T(-z), pol, tag); if(z == 0) @@ -525,10 +536,10 @@ T expint_i_imp(T z, const Policy& pol, const Tag& tag) } template -T expint_i_imp(T z, const Policy& pol, const std::integral_constant& tag) +BOOST_MATH_GPU_ENABLED T expint_i_imp(T z, const Policy& pol, const boost::math::integral_constant& tag) { BOOST_MATH_STD_USING - static const char* function = "boost::math::expint<%1%>(%1%)"; + constexpr auto function = "boost::math::expint<%1%>(%1%)"; if(z < 0) return -expint_imp(1, T(-z), pol, tag); if(z == 0) @@ -541,7 +552,7 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant& ta // Maximum Deviation Found: 2.852e-18 // Expected Error Term: 2.852e-18 // Max Error found at double precision = Poly: 2.636335e-16 Cheb: 4.187027e-16 - static const T P[10] = { + BOOST_MATH_STATIC const T P[10] = { BOOST_MATH_BIG_CONSTANT(T, 53, 2.98677224343598593013), BOOST_MATH_BIG_CONSTANT(T, 53, 0.356343618769377415068), BOOST_MATH_BIG_CONSTANT(T, 53, 0.780836076283730801839), @@ -553,7 +564,7 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant& ta BOOST_MATH_BIG_CONSTANT(T, 53, 0.798296365679269702435e-5), BOOST_MATH_BIG_CONSTANT(T, 53, 0.2777056254402008721e-6) }; - static const T Q[8] = { + BOOST_MATH_STATIC const T Q[8] = { BOOST_MATH_BIG_CONSTANT(T, 53, 1.0), BOOST_MATH_BIG_CONSTANT(T, 53, -1.17090412365413911947), BOOST_MATH_BIG_CONSTANT(T, 53, 0.62215109846016746276), @@ -564,11 +575,11 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant& ta BOOST_MATH_BIG_CONSTANT(T, 53, -0.138972589601781706598e-4) }; - static const T c1 = BOOST_MATH_BIG_CONSTANT(T, 53, 1677624236387711.0); - static const T c2 = BOOST_MATH_BIG_CONSTANT(T, 53, 4503599627370496.0); - static const T r1 = static_cast(c1 / c2); - static const T r2 = BOOST_MATH_BIG_CONSTANT(T, 53, 0.131401834143860282009280387409357165515556574352422001206362e-16); - static const T r = static_cast(BOOST_MATH_BIG_CONSTANT(T, 53, 0.372507410781366634461991866580119133535689497771654051555657435242200120636201854384926049951548942392)); + BOOST_MATH_STATIC_LOCAL_VARIABLE const T c1 = BOOST_MATH_BIG_CONSTANT(T, 53, 1677624236387711.0); + BOOST_MATH_STATIC_LOCAL_VARIABLE const T c2 = BOOST_MATH_BIG_CONSTANT(T, 53, 4503599627370496.0); + BOOST_MATH_STATIC_LOCAL_VARIABLE const T r1 = static_cast(c1 / c2); + BOOST_MATH_STATIC_LOCAL_VARIABLE const T r2 = BOOST_MATH_BIG_CONSTANT(T, 53, 0.131401834143860282009280387409357165515556574352422001206362e-16); + BOOST_MATH_STATIC_LOCAL_VARIABLE const T r = static_cast(BOOST_MATH_BIG_CONSTANT(T, 53, 0.372507410781366634461991866580119133535689497771654051555657435242200120636201854384926049951548942392)); T t = (z / 3) - 1; result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t); @@ -588,8 +599,8 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant& ta // Maximum Deviation Found: 6.546e-17 // Expected Error Term: 6.546e-17 // Max Error found at double precision = Poly: 6.890169e-17 Cheb: 6.772128e-17 - static const T Y = 1.158985137939453125F; - static const T P[8] = { + BOOST_MATH_STATIC_LOCAL_VARIABLE const T Y = 1.158985137939453125F; + BOOST_MATH_STATIC const T P[8] = { BOOST_MATH_BIG_CONSTANT(T, 53, 0.00139324086199402804173), BOOST_MATH_BIG_CONSTANT(T, 53, -0.0349921221823888744966), BOOST_MATH_BIG_CONSTANT(T, 53, -0.0264095520754134848538), @@ -599,7 +610,7 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant& ta BOOST_MATH_BIG_CONSTANT(T, 53, -0.554086272024881826253e-4), BOOST_MATH_BIG_CONSTANT(T, 53, -0.396487648924804510056e-5) }; - static const T Q[8] = { + BOOST_MATH_STATIC const T Q[8] = { BOOST_MATH_BIG_CONSTANT(T, 53, 1.0), BOOST_MATH_BIG_CONSTANT(T, 53, 0.744625566823272107711), BOOST_MATH_BIG_CONSTANT(T, 53, 0.329061095011767059236), @@ -621,8 +632,8 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant& ta // Expected Error Term: -1.842e-17 // Max Error found at double precision = Poly: 4.375868e-17 Cheb: 5.860967e-17 - static const T Y = 1.0869731903076171875F; - static const T P[9] = { + BOOST_MATH_STATIC_LOCAL_VARIABLE const T Y = 1.0869731903076171875F; + BOOST_MATH_STATIC const T P[9] = { BOOST_MATH_BIG_CONSTANT(T, 53, -0.00893891094356945667451), BOOST_MATH_BIG_CONSTANT(T, 53, -0.0484607730127134045806), BOOST_MATH_BIG_CONSTANT(T, 53, -0.0652810444222236895772), @@ -633,7 +644,7 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant& ta BOOST_MATH_BIG_CONSTANT(T, 53, -0.000209750022660200888349), BOOST_MATH_BIG_CONSTANT(T, 53, -0.138652200349182596186e-4) }; - static const T Q[9] = { + BOOST_MATH_STATIC const T Q[9] = { BOOST_MATH_BIG_CONSTANT(T, 53, 1.0), BOOST_MATH_BIG_CONSTANT(T, 53, 1.97017214039061194971), BOOST_MATH_BIG_CONSTANT(T, 53, 1.86232465043073157508), @@ -657,8 +668,8 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant& ta // Max Error found at double precision = Poly: 1.441088e-16 Cheb: 1.864792e-16 - static const T Y = 1.03937530517578125F; - static const T P[9] = { + BOOST_MATH_STATIC_LOCAL_VARIABLE const T Y = 1.03937530517578125F; + BOOST_MATH_STATIC const T P[9] = { BOOST_MATH_BIG_CONSTANT(T, 53, -0.00356165148914447597995), BOOST_MATH_BIG_CONSTANT(T, 53, -0.0229930320357982333406), BOOST_MATH_BIG_CONSTANT(T, 53, -0.0449814350482277917716), @@ -669,7 +680,7 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant& ta BOOST_MATH_BIG_CONSTANT(T, 53, -0.000192178045857733706044), BOOST_MATH_BIG_CONSTANT(T, 53, -0.113161784705911400295e-9) }; - static const T Q[9] = { + BOOST_MATH_STATIC const T Q[9] = { BOOST_MATH_BIG_CONSTANT(T, 53, 1.0), BOOST_MATH_BIG_CONSTANT(T, 53, 2.84354408840148561131), BOOST_MATH_BIG_CONSTANT(T, 53, 3.6599610090072393012), @@ -688,9 +699,9 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant& ta else { // Max Error found at double precision = 3.381886e-17 - static const T exp40 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 53, 2.35385266837019985407899910749034804508871617254555467236651e17)); - static const T Y= 1.013065338134765625F; - static const T P[6] = { + BOOST_MATH_STATIC_LOCAL_VARIABLE const T exp40 = static_cast(BOOST_MATH_BIG_CONSTANT(T, 53, 2.35385266837019985407899910749034804508871617254555467236651e17)); + BOOST_MATH_STATIC_LOCAL_VARIABLE const T Y= 1.013065338134765625F; + BOOST_MATH_STATIC const T P[6] = { BOOST_MATH_BIG_CONSTANT(T, 53, -0.0130653381347656243849), BOOST_MATH_BIG_CONSTANT(T, 53, 0.19029710559486576682), BOOST_MATH_BIG_CONSTANT(T, 53, 94.7365094537197236011), @@ -698,7 +709,7 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant& ta BOOST_MATH_BIG_CONSTANT(T, 53, 18932.0850014925993025), BOOST_MATH_BIG_CONSTANT(T, 53, -38703.1431362056714134) }; - static const T Q[7] = { + BOOST_MATH_STATIC const T Q[7] = { BOOST_MATH_BIG_CONSTANT(T, 53, 1.0), BOOST_MATH_BIG_CONSTANT(T, 53, 61.9733592849439884145), BOOST_MATH_BIG_CONSTANT(T, 53, -2354.56211323420194283), @@ -739,10 +750,10 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant& ta } template -T expint_i_imp(T z, const Policy& pol, const std::integral_constant& tag) +BOOST_MATH_GPU_ENABLED T expint_i_imp(T z, const Policy& pol, const boost::math::integral_constant& tag) { BOOST_MATH_STD_USING - static const char* function = "boost::math::expint<%1%>(%1%)"; + constexpr auto function = "boost::math::expint<%1%>(%1%)"; if(z < 0) return -expint_imp(1, T(-z), pol, tag); if(z == 0) @@ -976,7 +987,7 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant& ta } template -void expint_i_imp_113a(T& result, const T& z, const Policy& pol) +BOOST_MATH_GPU_ENABLED void expint_i_imp_113a(T& result, const T& z, const Policy& pol) { BOOST_MATH_STD_USING // Maximum Deviation Found: 1.230e-36 @@ -1044,7 +1055,7 @@ void expint_i_imp_113a(T& result, const T& z, const Policy& pol) } template -void expint_i_113b(T& result, const T& z) +BOOST_MATH_GPU_ENABLED void expint_i_113b(T& result, const T& z) { BOOST_MATH_STD_USING // Maximum Deviation Found: 7.779e-36 @@ -1094,7 +1105,7 @@ void expint_i_113b(T& result, const T& z) } template -void expint_i_113c(T& result, const T& z) +BOOST_MATH_GPU_ENABLED void expint_i_113c(T& result, const T& z) { BOOST_MATH_STD_USING // Maximum Deviation Found: 1.082e-34 @@ -1147,7 +1158,7 @@ void expint_i_113c(T& result, const T& z) } template -void expint_i_113d(T& result, const T& z) +BOOST_MATH_GPU_ENABLED void expint_i_113d(T& result, const T& z) { BOOST_MATH_STD_USING // Maximum Deviation Found: 3.163e-35 @@ -1198,7 +1209,7 @@ void expint_i_113d(T& result, const T& z) } template -void expint_i_113e(T& result, const T& z) +BOOST_MATH_GPU_ENABLED void expint_i_113e(T& result, const T& z) { BOOST_MATH_STD_USING // Maximum Deviation Found: 7.972e-36 @@ -1252,7 +1263,7 @@ void expint_i_113e(T& result, const T& z) } template -void expint_i_113f(T& result, const T& z) +BOOST_MATH_GPU_ENABLED void expint_i_113f(T& result, const T& z) { BOOST_MATH_STD_USING // Maximum Deviation Found: 4.469e-36 @@ -1299,7 +1310,7 @@ void expint_i_113f(T& result, const T& z) } template -void expint_i_113g(T& result, const T& z) +BOOST_MATH_GPU_ENABLED void expint_i_113g(T& result, const T& z) { BOOST_MATH_STD_USING // Maximum Deviation Found: 5.588e-35 @@ -1344,7 +1355,7 @@ void expint_i_113g(T& result, const T& z) } template -void expint_i_113h(T& result, const T& z) +BOOST_MATH_GPU_ENABLED void expint_i_113h(T& result, const T& z) { BOOST_MATH_STD_USING // Maximum Deviation Found: 4.448e-36 @@ -1383,10 +1394,10 @@ void expint_i_113h(T& result, const T& z) } template -T expint_i_imp(T z, const Policy& pol, const std::integral_constant& tag) +BOOST_MATH_GPU_ENABLED T expint_i_imp(T z, const Policy& pol, const boost::math::integral_constant& tag) { BOOST_MATH_STD_USING - static const char* function = "boost::math::expint<%1%>(%1%)"; + constexpr auto function = "boost::math::expint<%1%>(%1%)"; if(z < 0) return -expint_imp(1, T(-z), pol, tag); if(z == 0) @@ -1491,12 +1502,12 @@ struct expint_i_initializer { struct init { - init() + BOOST_MATH_GPU_ENABLED init() { do_init(tag()); } - static void do_init(const std::integral_constant&){} - static void do_init(const std::integral_constant&) + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&){} + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&) { boost::math::expint(T(5), Policy()); boost::math::expint(T(7), Policy()); @@ -1504,7 +1515,7 @@ struct expint_i_initializer boost::math::expint(T(38), Policy()); boost::math::expint(T(45), Policy()); } - static void do_init(const std::integral_constant&) + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&) { boost::math::expint(T(5), Policy()); boost::math::expint(T(7), Policy()); @@ -1512,7 +1523,7 @@ struct expint_i_initializer boost::math::expint(T(38), Policy()); boost::math::expint(T(45), Policy()); } - static void do_init(const std::integral_constant&) + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&) { boost::math::expint(T(5), Policy()); boost::math::expint(T(7), Policy()); @@ -1524,12 +1535,14 @@ struct expint_i_initializer boost::math::expint(T(200), Policy()); boost::math::expint(T(220), Policy()); } - void force_instantiate()const{} + BOOST_MATH_GPU_ENABLED void force_instantiate()const{} }; static const init initializer; - static void force_instantiate() + BOOST_MATH_GPU_ENABLED static void force_instantiate() { + #ifndef BOOST_MATH_HAS_GPU_SUPPORT initializer.force_instantiate(); + #endif } }; @@ -1541,33 +1554,35 @@ struct expint_1_initializer { struct init { - init() + BOOST_MATH_GPU_ENABLED init() { do_init(tag()); } - static void do_init(const std::integral_constant&){} - static void do_init(const std::integral_constant&) + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&){} + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&) { boost::math::expint(1, T(0.5), Policy()); boost::math::expint(1, T(2), Policy()); } - static void do_init(const std::integral_constant&) + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&) { boost::math::expint(1, T(0.5), Policy()); boost::math::expint(1, T(2), Policy()); } - static void do_init(const std::integral_constant&) + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&) { boost::math::expint(1, T(0.5), Policy()); boost::math::expint(1, T(2), Policy()); boost::math::expint(1, T(6), Policy()); } - void force_instantiate()const{} + BOOST_MATH_GPU_ENABLED void force_instantiate()const{} }; static const init initializer; - static void force_instantiate() + BOOST_MATH_GPU_ENABLED static void force_instantiate() { + #ifndef BOOST_MATH_HAS_GPU_SUPPORT initializer.force_instantiate(); + #endif } }; @@ -1575,8 +1590,8 @@ template const typename expint_1_initializer::init expint_1_initializer::initializer; template -inline typename tools::promote_args::type - expint_forwarder(T z, const Policy& /*pol*/, std::true_type const&) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type + expint_forwarder(T z, const Policy& /*pol*/, boost::math::true_type const&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -1587,7 +1602,7 @@ inline typename tools::promote_args::type policies::promote_double, policies::discrete_quantile<>, policies::assert_undefined<> >::type forwarding_policy; - typedef std::integral_constant::type } template -inline typename tools::promote_args::type -expint_forwarder(unsigned n, T z, const std::false_type&) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type +expint_forwarder(unsigned n, T z, const boost::math::false_type&) { return boost::math::expint(n, z, policies::policy<>()); } @@ -1612,7 +1627,7 @@ expint_forwarder(unsigned n, T z, const std::false_type&) } // namespace detail template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type expint(unsigned n, T z, const Policy& /*pol*/) { typedef typename tools::promote_args::type result_type; @@ -1624,7 +1639,7 @@ inline typename tools::promote_args::type policies::promote_double, policies::discrete_quantile<>, policies::assert_undefined<> >::type forwarding_policy; - typedef std::integral_constant::type } template -inline typename detail::expint_result::type +BOOST_MATH_GPU_ENABLED inline typename detail::expint_result::type expint(T const z, U const u) { typedef typename policies::is_policy::type tag_type; @@ -1649,7 +1664,7 @@ inline typename detail::expint_result::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type expint(T z) { return expint(z, policies::policy<>()); diff --git a/include/boost/math/special_functions/expm1.hpp b/include/boost/math/special_functions/expm1.hpp index eec6356031..5e61ca20b0 100644 --- a/include/boost/math/special_functions/expm1.hpp +++ b/include/boost/math/special_functions/expm1.hpp @@ -1,4 +1,5 @@ // (C) Copyright John Maddock 2006. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -10,10 +11,10 @@ #pragma once #endif -#include -#include -#include #include + +#ifndef BOOST_MATH_HAS_NVRTC + #include #include #include @@ -21,6 +22,9 @@ #include #include #include +#include +#include +#include #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128) // @@ -45,10 +49,10 @@ namespace detail { typedef T result_type; - expm1_series(T x) + BOOST_MATH_GPU_ENABLED expm1_series(T x) : k(0), m_x(x), m_term(1) {} - T operator()() + BOOST_MATH_GPU_ENABLED T operator()() { ++k; m_term *= m_x; @@ -56,7 +60,7 @@ namespace detail return m_term; } - int count()const + BOOST_MATH_GPU_ENABLED int count()const { return k; } @@ -74,26 +78,28 @@ struct expm1_initializer { struct init { - init() + BOOST_MATH_GPU_ENABLED init() { do_init(tag()); } template - static void do_init(const std::integral_constant&){} - static void do_init(const std::integral_constant&) + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&){} + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&) { expm1(T(0.5)); } - static void do_init(const std::integral_constant&) + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&) { expm1(T(0.5)); } - void force_instantiate()const{} + BOOST_MATH_GPU_ENABLED void force_instantiate()const{} }; - static const init initializer; - static void force_instantiate() + BOOST_MATH_STATIC const init initializer; + BOOST_MATH_GPU_ENABLED static void force_instantiate() { + #ifndef BOOST_MATH_HAS_GPU_SUPPORT initializer.force_instantiate(); + #endif } }; @@ -106,7 +112,7 @@ const typename expm1_initializer::init expm1_initializer |x| > epsilon. // template -T expm1_imp(T x, const std::integral_constant&, const Policy& pol) +T expm1_imp(T x, const boost::math::integral_constant&, const Policy& pol) { BOOST_MATH_STD_USING @@ -128,7 +134,7 @@ T expm1_imp(T x, const std::integral_constant&, const Policy& pol) if(a < tools::epsilon()) return x; detail::expm1_series s(x); - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); T result = tools::sum_series(s, policies::get_epsilon(), max_iter); @@ -137,7 +143,7 @@ T expm1_imp(T x, const std::integral_constant&, const Policy& pol) } template -T expm1_imp(T x, const std::integral_constant&, const P& pol) +BOOST_MATH_GPU_ENABLED T expm1_imp(T x, const boost::math::integral_constant&, const P& pol) { BOOST_MATH_STD_USING @@ -155,16 +161,16 @@ T expm1_imp(T x, const std::integral_constant&, const P& pol) if(a < tools::epsilon()) return x; - static const float Y = 0.10281276702880859e1f; - static const T n[] = { static_cast(-0.28127670288085937e-1), static_cast(0.51278186299064534e0), static_cast(-0.6310029069350198e-1), static_cast(0.11638457975729296e-1), static_cast(-0.52143390687521003e-3), static_cast(0.21491399776965688e-4) }; - static const T d[] = { 1, static_cast(-0.45442309511354755e0), static_cast(0.90850389570911714e-1), static_cast(-0.10088963629815502e-1), static_cast(0.63003407478692265e-3), static_cast(-0.17976570003654402e-4) }; + BOOST_MATH_STATIC const float Y = 0.10281276702880859e1f; + BOOST_MATH_STATIC const T n[] = { static_cast(-0.28127670288085937e-1), static_cast(0.51278186299064534e0), static_cast(-0.6310029069350198e-1), static_cast(0.11638457975729296e-1), static_cast(-0.52143390687521003e-3), static_cast(0.21491399776965688e-4) }; + BOOST_MATH_STATIC const T d[] = { 1, static_cast(-0.45442309511354755e0), static_cast(0.90850389570911714e-1), static_cast(-0.10088963629815502e-1), static_cast(0.63003407478692265e-3), static_cast(-0.17976570003654402e-4) }; T result = x * Y + x * tools::evaluate_polynomial(n, x) / tools::evaluate_polynomial(d, x); return result; } template -T expm1_imp(T x, const std::integral_constant&, const P& pol) +BOOST_MATH_GPU_ENABLED T expm1_imp(T x, const boost::math::integral_constant&, const P& pol) { BOOST_MATH_STD_USING @@ -182,8 +188,8 @@ T expm1_imp(T x, const std::integral_constant&, const P& pol) if(a < tools::epsilon()) return x; - static const float Y = 0.10281276702880859375e1f; - static const T n[] = { + BOOST_MATH_STATIC const float Y = 0.10281276702880859375e1f; + BOOST_MATH_STATIC const T n[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.281276702880859375e-1), BOOST_MATH_BIG_CONSTANT(T, 64, 0.512980290285154286358e0), BOOST_MATH_BIG_CONSTANT(T, 64, -0.667758794592881019644e-1), @@ -192,7 +198,7 @@ T expm1_imp(T x, const std::integral_constant&, const P& pol) BOOST_MATH_BIG_CONSTANT(T, 64, 0.447441185192951335042e-4), BOOST_MATH_BIG_CONSTANT(T, 64, -0.714539134024984593011e-6) }; - static const T d[] = { + BOOST_MATH_STATIC const T d[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, -0.461477618025562520389e0), BOOST_MATH_BIG_CONSTANT(T, 64, 0.961237488025708540713e-1), @@ -207,7 +213,7 @@ T expm1_imp(T x, const std::integral_constant&, const P& pol) } template -T expm1_imp(T x, const std::integral_constant&, const P& pol) +BOOST_MATH_GPU_ENABLED T expm1_imp(T x, const boost::math::integral_constant&, const P& pol) { BOOST_MATH_STD_USING @@ -259,7 +265,7 @@ T expm1_imp(T x, const std::integral_constant&, const P& pol) } // namespace detail template -inline typename tools::promote_args::type expm1(T x, const Policy& /* pol */) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type expm1(T x, const Policy& /* pol */) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -271,7 +277,7 @@ inline typename tools::promote_args::type expm1(T x, const Policy& /* pol */) policies::discrete_quantile<>, policies::assert_undefined<> >::type forwarding_policy; - typedef std::integral_constant::type expm1(T x, const Policy& /* pol */) #if defined(BOOST_HAS_EXPM1) && !(defined(__osf__) && defined(__DECCXX_VER)) # ifdef BOOST_MATH_USE_C99 -inline float expm1(float x, const policies::policy<>&){ return ::expm1f(x); } +BOOST_MATH_GPU_ENABLED inline float expm1(float x, const policies::policy<>&){ return ::expm1f(x); } # ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS inline long double expm1(long double x, const policies::policy<>&){ return ::expm1l(x); } # endif # else inline float expm1(float x, const policies::policy<>&){ return static_cast(::expm1(x)); } # endif -inline double expm1(double x, const policies::policy<>&){ return ::expm1(x); } +BOOST_MATH_GPU_ENABLED inline double expm1(double x, const policies::policy<>&){ return ::expm1(x); } #endif template -inline typename tools::promote_args::type expm1(T x) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type expm1(T x) { return expm1(x, policies::policy<>()); } @@ -313,6 +319,40 @@ inline typename tools::promote_args::type expm1(T x) } // namespace math } // namespace boost +#else // Special handling for NVRTC + +namespace boost { +namespace math { + +template +BOOST_MATH_GPU_ENABLED auto expm1(T x) +{ + return ::expm1(x); +} + +template <> +BOOST_MATH_GPU_ENABLED auto expm1(float x) +{ + return ::expm1f(x); +} + +template +BOOST_MATH_GPU_ENABLED auto expm1(T x, const Policy&) +{ + return ::expm1(x); +} + +template +BOOST_MATH_GPU_ENABLED auto expm1(float x, const Policy&) +{ + return ::expm1f(x); +} + +} // Namespace math +} // Namespace boost + +#endif // BOOST_MATH_HAS_NVRTC + #endif // BOOST_MATH_HYPOT_INCLUDED diff --git a/include/boost/math/special_functions/factorials.hpp b/include/boost/math/special_functions/factorials.hpp index 7229635cb9..ec6978bdc5 100644 --- a/include/boost/math/special_functions/factorials.hpp +++ b/include/boost/math/special_functions/factorials.hpp @@ -10,10 +10,14 @@ #pragma once #endif -#include +#include +#include +#include +#include #include #include -#include +#include + #ifdef _MSC_VER #pragma warning(push) // Temporary until lexical cast fixed. #pragma warning(disable: 4127 4701) @@ -21,16 +25,14 @@ #ifdef _MSC_VER #pragma warning(pop) #endif -#include -#include namespace boost { namespace math { template -inline T factorial(unsigned i, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T factorial(unsigned i, const Policy& pol) { - static_assert(!std::is_integral::value, "Type T must not be an integral type"); + static_assert(!boost::math::is_integral::value, "Type T must not be an integral type"); // factorial(n) is not implemented // because it would overflow integral type T for too small n // to be useful. Use instead a floating-point type, @@ -49,7 +51,7 @@ inline T factorial(unsigned i, const Policy& pol) } template -inline T factorial(unsigned i) +BOOST_MATH_GPU_ENABLED inline T factorial(unsigned i) { return factorial(i, policies::policy<>()); } @@ -72,9 +74,9 @@ inline double factorial(unsigned i) } */ template -T double_factorial(unsigned i, const Policy& pol) +BOOST_MATH_GPU_ENABLED T double_factorial(unsigned i, const Policy& pol) { - static_assert(!std::is_integral::value, "Type T must not be an integral type"); + static_assert(!boost::math::is_integral::value, "Type T must not be an integral type"); BOOST_MATH_STD_USING // ADL lookup of std names if(i & 1) { @@ -107,17 +109,20 @@ T double_factorial(unsigned i, const Policy& pol) } template -inline T double_factorial(unsigned i) +BOOST_MATH_GPU_ENABLED inline T double_factorial(unsigned i) { return double_factorial(i, policies::policy<>()); } +// TODO(mborland): We do not currently have support for tgamma_delta_ratio +#ifndef BOOST_MATH_HAS_GPU_SUPPORT + namespace detail{ template T rising_factorial_imp(T x, int n, const Policy& pol) { - static_assert(!std::is_integral::value, "Type T must not be an integral type"); + static_assert(!boost::math::is_integral::value, "Type T must not be an integral type"); if(x < 0) { // @@ -165,7 +170,7 @@ T rising_factorial_imp(T x, int n, const Policy& pol) template inline T falling_factorial_imp(T x, unsigned n, const Policy& pol) { - static_assert(!std::is_integral::value, "Type T must not be an integral type"); + static_assert(!boost::math::is_integral::value, "Type T must not be an integral type"); BOOST_MATH_STD_USING // ADL of std names if(x == 0) return 0; @@ -262,6 +267,8 @@ inline typename tools::promote_args::type static_cast(x), n, pol); } +#endif // BOOST_MATH_HAS_GPU_SUPPORT + } // namespace math } // namespace boost diff --git a/include/boost/math/special_functions/fpclassify.hpp b/include/boost/math/special_functions/fpclassify.hpp index 2c504d7ac8..0ac9470f28 100644 --- a/include/boost/math/special_functions/fpclassify.hpp +++ b/include/boost/math/special_functions/fpclassify.hpp @@ -1,5 +1,6 @@ // Copyright John Maddock 2005-2008. // Copyright (c) 2006-2008 Johan Rade +// Copyright (c) 2024 Matt Borland // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -11,12 +12,17 @@ #pragma once #endif -#include -#include -#include +#include + +#ifndef BOOST_MATH_HAS_NVRTC + #include #include #include +#include +#include +#include + /*! \file fpclassify.hpp \brief Classify floating-point value as normal, subnormal, zero, infinite, or NaN. @@ -76,6 +82,80 @@ is used. */ +#ifdef BOOST_MATH_HAS_GPU_SUPPORT + +namespace boost { namespace math { + +template<> inline BOOST_MATH_GPU_ENABLED bool (isnan)(float x) { return x != x; } +template<> inline BOOST_MATH_GPU_ENABLED bool (isnan)(double x) { return x != x; } + +template<> inline BOOST_MATH_GPU_ENABLED bool (isinf)(float x) { return x > FLT_MAX || x < -FLT_MAX; } +template<> inline BOOST_MATH_GPU_ENABLED bool (isinf)(double x) { return x > DBL_MAX || x < -DBL_MAX; } + +template<> inline BOOST_MATH_GPU_ENABLED bool (isfinite)(float x) { return !isnan(x) && !isinf(x); } +template<> inline BOOST_MATH_GPU_ENABLED bool (isfinite)(double x) { return !isnan(x) && !isinf(x); } + +template<> inline BOOST_MATH_GPU_ENABLED bool (isnormal)(float x) +{ + if(x < 0) x = -x; + return (x >= FLT_MIN) && (x <= FLT_MAX); +} +template<> inline BOOST_MATH_GPU_ENABLED bool (isnormal)(double x) +{ + if(x < 0) x = -x; + return (x >= DBL_MIN) && (x <= DBL_MAX); +} + +template<> inline BOOST_MATH_GPU_ENABLED int (fpclassify)(float t) +{ + if((boost::math::isnan)(t)) + return FP_NAN; + // std::fabs broken on a few systems especially for long long!!!! + float at = (t < 0.0f) ? -t : t; + + // Use a process of exclusion to figure out + // what kind of type we have, this relies on + // IEEE conforming reals that will treat + // Nan's as unordered. Some compilers + // don't do this once optimisations are + // turned on, hence the check for nan's above. + if(at <= FLT_MAX) + { + if(at >= FLT_MIN) + return FP_NORMAL; + return (at != 0) ? FP_SUBNORMAL : FP_ZERO; + } + else if(at > FLT_MAX) + return FP_INFINITE; + return FP_NAN; +} + +template<> inline BOOST_MATH_GPU_ENABLED int (fpclassify)(double t) +{ + if((boost::math::isnan)(t)) + return FP_NAN; + // std::fabs broken on a few systems especially for long long!!!! + double at = (t < 0.0) ? -t : t; + + // Use a process of exclusion to figure out + // what kind of type we have, this relies on + // IEEE conforming reals that will treat + // Nan's as unordered. Some compilers + // don't do this once optimisations are + // turned on, hence the check for nan's above. + if(at <= DBL_MAX) + { + if(at >= DBL_MIN) + return FP_NORMAL; + return (at != 0) ? FP_SUBNORMAL : FP_ZERO; + } + else if(at > DBL_MAX) + return FP_INFINITE; + return FP_NAN; +} + +#else + #if defined(_MSC_VER) || defined(BOOST_BORLANDC) #include #endif @@ -632,7 +712,86 @@ inline bool (isnan)(__float128 x) } #endif +#endif + } // namespace math } // namespace boost + +#else // Special handling generally using the CUDA library + +#include + +namespace boost { +namespace math { + +template , bool> = true> +inline BOOST_MATH_GPU_ENABLED bool isnan(T x) +{ + return false; +} + +template , bool> = true> +inline BOOST_MATH_GPU_ENABLED bool isnan(T x) +{ + return ::isnan(x); +} + +template , bool> = true> +inline BOOST_MATH_GPU_ENABLED bool isinf(T x) +{ + return false; +} + +template , bool> = true> +inline BOOST_MATH_GPU_ENABLED bool isinf(T x) +{ + return ::isinf(x); +} + +template , bool> = true> +inline BOOST_MATH_GPU_ENABLED bool isfinite(T x) +{ + return true; +} + +template , bool> = true> +inline BOOST_MATH_GPU_ENABLED bool isfinite(T x) +{ + return ::isfinite(x); +} + +template +inline BOOST_MATH_GPU_ENABLED bool isnormal(T x) +{ + return x != static_cast(0) && x != static_cast(-0) && + !boost::math::isnan(x) && + !boost::math::isinf(x); +} + +// We skip the check for FP_SUBNORMAL since they are not supported on these platforms +template +inline BOOST_MATH_GPU_ENABLED int fpclassify(T x) +{ + if (boost::math::isnan(x)) + { + return BOOST_MATH_FP_NAN; + } + else if (boost::math::isinf(x)) + { + return BOOST_MATH_FP_INFINITE; + } + else if (x == static_cast(0) || x == static_cast(-0)) + { + return BOOST_MATH_FP_ZERO; + } + + return BOOST_MATH_FP_NORMAL; +} + +} // Namespace math +} // Namespace boost + +#endif // BOOST_MATH_HAS_NVRTC + #endif // BOOST_MATH_FPCLASSIFY_HPP diff --git a/include/boost/math/special_functions/gamma.hpp b/include/boost/math/special_functions/gamma.hpp index a58ea3e693..4a15782c01 100644 --- a/include/boost/math/special_functions/gamma.hpp +++ b/include/boost/math/special_functions/gamma.hpp @@ -2,7 +2,7 @@ // Copyright Paul A. Bristow 2007, 2013-14. // Copyright Nikhar Agrawal 2013-14 // Copyright Christopher Kormanyos 2013-14, 2020, 2024 - +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -14,12 +14,15 @@ #pragma once #endif +#include #include #include #include #include +#include +#include +#include #include -#include #include #include #include @@ -32,12 +35,12 @@ #include #include #include + +// Only needed for types larger than double +#ifndef BOOST_MATH_HAS_GPU_SUPPORT #include #include - -#include -#include -#include +#endif #ifdef _MSC_VER # pragma warning(push) @@ -56,13 +59,13 @@ namespace boost{ namespace math{ namespace detail{ template -inline bool is_odd(T v, const std::true_type&) +BOOST_MATH_GPU_ENABLED inline bool is_odd(T v, const boost::math::true_type&) { int i = static_cast(v); return i&1; } template -inline bool is_odd(T v, const std::false_type&) +BOOST_MATH_GPU_ENABLED inline bool is_odd(T v, const boost::math::false_type&) { // Oh dear can't cast T to int! BOOST_MATH_STD_USING @@ -70,13 +73,13 @@ inline bool is_odd(T v, const std::false_type&) return static_cast(modulus != 0); } template -inline bool is_odd(T v) +BOOST_MATH_GPU_ENABLED inline bool is_odd(T v) { - return is_odd(v, ::std::is_convertible()); + return is_odd(v, ::boost::math::is_convertible()); } template -T sinpx(T z) +BOOST_MATH_GPU_ENABLED T sinpx(T z) { // Ad hoc function calculates x * sin(pi * x), // taking extra care near when x is near a whole number. @@ -108,7 +111,7 @@ T sinpx(T z) // tgamma(z), with Lanczos support: // template -T gamma_imp(T z, const Policy& pol, const Lanczos& l) +BOOST_MATH_GPU_ENABLED T gamma_imp_final(T z, const Policy& pol, const Lanczos&) { BOOST_MATH_STD_USING @@ -122,25 +125,13 @@ T gamma_imp(T z, const Policy& pol, const Lanczos& l) b = true; } #endif - static const char* function = "boost::math::tgamma<%1%>(%1%)"; + constexpr auto function = "boost::math::tgamma<%1%>(%1%)"; if(z <= 0) { if(floor(z) == z) - return policies::raise_pole_error(function, "Evaluation of tgamma at a negative integer %1%.", z, pol); - if(z <= -20) { - result = gamma_imp(T(-z), pol, l) * sinpx(z); - BOOST_MATH_INSTRUMENT_VARIABLE(result); - if((fabs(result) < 1) && (tools::max_value() * fabs(result) < boost::math::constants::pi())) - return -boost::math::sign(result) * policies::raise_overflow_error(function, "Result of tgamma is too large to represent.", pol); - result = -boost::math::constants::pi() / result; - if(result == 0) - return policies::raise_underflow_error(function, "Result of tgamma is too small to represent.", pol); - if((boost::math::fpclassify)(result) == (int)FP_SUBNORMAL) - return policies::raise_denorm_error(function, "Result of tgamma is denormalized.", result, pol); - BOOST_MATH_INSTRUMENT_VARIABLE(result); - return result; + return policies::raise_pole_error(function, "Evaluation of tgamma at a negative integer %1%.", z, pol); } // shift z to > 1: @@ -195,11 +186,52 @@ T gamma_imp(T z, const Policy& pol, const Lanczos& l) } return result; } + +#ifdef BOOST_MATH_ENABLE_CUDA +# pragma nv_diag_suppress 2190 +#endif + +// SYCL compilers can not support recursion so we extract it into a dispatch function +template +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T gamma_imp(T z, const Policy& pol, const Lanczos& l) +{ + BOOST_MATH_STD_USING + + T result = 1; + constexpr auto function = "boost::math::tgamma<%1%>(%1%)"; + + if(z <= 0) + { + if(floor(z) == z) + return policies::raise_pole_error(function, "Evaluation of tgamma at a negative integer %1%.", z, pol); + if(z <= -20) + { + result = gamma_imp_final(T(-z), pol, l) * sinpx(z); + BOOST_MATH_INSTRUMENT_VARIABLE(result); + if((fabs(result) < 1) && (tools::max_value() * fabs(result) < boost::math::constants::pi())) + return -boost::math::sign(result) * policies::raise_overflow_error(function, "Result of tgamma is too large to represent.", pol); + result = -boost::math::constants::pi() / result; + if(result == 0) + return policies::raise_underflow_error(function, "Result of tgamma is too small to represent.", pol); + if((boost::math::fpclassify)(result) == BOOST_MATH_FP_SUBNORMAL) + return policies::raise_denorm_error(function, "Result of tgamma is denormalized.", result, pol); + BOOST_MATH_INSTRUMENT_VARIABLE(result); + return result; + } + } + + return gamma_imp_final(T(z), pol, l); +} + +#ifdef BOOST_MATH_ENABLE_CUDA +# pragma nv_diag_default 2190 +#endif + // // lgamma(z) with Lanczos support: // template -T lgamma_imp(T z, const Policy& pol, const Lanczos& l, int* sign = nullptr) +BOOST_MATH_GPU_ENABLED T lgamma_imp_final(T z, const Policy& pol, const Lanczos& l, int* sign = nullptr) { #ifdef BOOST_MATH_INSTRUMENT static bool b = false; @@ -212,29 +244,12 @@ T lgamma_imp(T z, const Policy& pol, const Lanczos& l, int* sign = nullptr) BOOST_MATH_STD_USING - static const char* function = "boost::math::lgamma<%1%>(%1%)"; + constexpr auto function = "boost::math::lgamma<%1%>(%1%)"; T result = 0; int sresult = 1; - if(z <= -tools::root_epsilon()) - { - // reflection formula: - if(floor(z) == z) - return policies::raise_pole_error(function, "Evaluation of lgamma at a negative integer %1%.", z, pol); - - T t = sinpx(z); - z = -z; - if(t < 0) - { - t = -t; - } - else - { - sresult = -sresult; - } - result = log(boost::math::constants::pi()) - lgamma_imp(z, pol, l) - log(t); - } - else if (z < tools::root_epsilon()) + + if (z < tools::root_epsilon()) { if (0 == z) return policies::raise_pole_error(function, "Evaluation of lgamma at %1%.", z, pol); @@ -248,7 +263,7 @@ T lgamma_imp(T z, const Policy& pol, const Lanczos& l, int* sign = nullptr) else if(z < 15) { typedef typename policies::precision::type precision_type; - typedef std::integral_constant(z, T(z - 1), T(z - 2), tag_type(), pol, l); } - else if((z >= 3) && (z < 100) && (std::numeric_limits::max_exponent >= 1024)) + else if((z >= 3) && (z < 100) && (boost::math::numeric_limits::max_exponent >= 1024)) { // taking the log of tgamma reduces the error, no danger of overflow here: result = log(gamma_imp(z, pol, l)); @@ -279,6 +294,55 @@ T lgamma_imp(T z, const Policy& pol, const Lanczos& l, int* sign = nullptr) return result; } +#ifdef BOOST_MATH_ENABLE_CUDA +# pragma nv_diag_suppress 2190 +#endif + +template +BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T lgamma_imp(T z, const Policy& pol, const Lanczos& l, int* sign = nullptr) +{ + BOOST_MATH_STD_USING + + if(z <= -tools::root_epsilon()) + { + constexpr auto function = "boost::math::lgamma<%1%>(%1%)"; + + T result = 0; + int sresult = 1; + + // reflection formula: + if(floor(z) == z) + return policies::raise_pole_error(function, "Evaluation of lgamma at a negative integer %1%.", z, pol); + + T t = sinpx(z); + z = -z; + if(t < 0) + { + t = -t; + } + else + { + sresult = -sresult; + } + result = log(boost::math::constants::pi()) - lgamma_imp_final(T(z), pol, l) - log(t); + + if(sign) + { + *sign = sresult; + } + + return result; + } + else + { + return lgamma_imp_final(T(z), pol, l, sign); + } +} + +#ifdef BOOST_MATH_ENABLE_CUDA +# pragma nv_diag_default 2190 +#endif + // // Incomplete gamma functions follow: // @@ -289,14 +353,14 @@ struct upper_incomplete_gamma_fract T z, a; int k; public: - typedef std::pair result_type; + typedef boost::math::pair result_type; - upper_incomplete_gamma_fract(T a1, T z1) + BOOST_MATH_GPU_ENABLED upper_incomplete_gamma_fract(T a1, T z1) : z(z1-a1+1), a(a1), k(0) { } - result_type operator()() + BOOST_MATH_GPU_ENABLED result_type operator()() { ++k; z += 2; @@ -305,7 +369,7 @@ struct upper_incomplete_gamma_fract }; template -inline T upper_gamma_fraction(T a, T z, T eps) +BOOST_MATH_GPU_ENABLED inline T upper_gamma_fraction(T a, T z, T eps) { // Multiply result by z^a * e^-z to get the full // upper incomplete integral. Divide by tgamma(z) @@ -321,9 +385,9 @@ struct lower_incomplete_gamma_series T a, z, result; public: typedef T result_type; - lower_incomplete_gamma_series(T a1, T z1) : a(a1), z(z1), result(1){} + BOOST_MATH_GPU_ENABLED lower_incomplete_gamma_series(T a1, T z1) : a(a1), z(z1), result(1){} - T operator()() + BOOST_MATH_GPU_ENABLED T operator()() { T r = result; a += 1; @@ -333,32 +397,34 @@ struct lower_incomplete_gamma_series }; template -inline T lower_gamma_series(T a, T z, const Policy& pol, T init_value = 0) +BOOST_MATH_GPU_ENABLED inline T lower_gamma_series(T a, T z, const Policy& pol, T init_value = 0) { // Multiply result by ((z^a) * (e^-z) / a) to get the full // lower incomplete integral. Then divide by tgamma(a) // to get the normalised value. lower_incomplete_gamma_series s(a, z); - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); T factor = policies::get_epsilon(); T result = boost::math::tools::sum_series(s, factor, max_iter, init_value); policies::check_series_iterations("boost::math::detail::lower_gamma_series<%1%>(%1%)", max_iter, pol); return result; } +#ifndef BOOST_MATH_HAS_GPU_SUPPORT + // // Fully generic tgamma and lgamma use Stirling's approximation // with Bernoulli numbers. // template -std::size_t highest_bernoulli_index() +boost::math::size_t highest_bernoulli_index() { - const float digits10_of_type = (std::numeric_limits::is_specialized - ? static_cast(std::numeric_limits::digits10) + const float digits10_of_type = (boost::math::numeric_limits::is_specialized + ? static_cast(boost::math::numeric_limits::digits10) : static_cast(boost::math::tools::digits() * 0.301F)); // Find the high index n for Bn to produce the desired precision in Stirling's calculation. - return static_cast(18.0F + (0.6F * digits10_of_type)); + return static_cast(18.0F + (0.6F * digits10_of_type)); } template @@ -366,8 +432,8 @@ int minimum_argument_for_bernoulli_recursion() { BOOST_MATH_STD_USING - const float digits10_of_type = (std::numeric_limits::is_specialized - ? (float) std::numeric_limits::digits10 + const float digits10_of_type = (boost::math::numeric_limits::is_specialized + ? (float) boost::math::numeric_limits::digits10 : (float) (boost::math::tools::digits() * 0.301F)); int min_arg = (int) (digits10_of_type * 1.7F); @@ -389,7 +455,7 @@ int minimum_argument_for_bernoulli_recursion() const float d2_minus_one = ((digits10_of_type / 0.301F) - 1.0F); const float limit = ceil(exp((d2_minus_one * log(2.0F)) / 20.0F)); - min_arg = (int) ((std::min)(digits10_of_type * 1.7F, limit)); + min_arg = (int) (BOOST_MATH_GPU_SAFE_MIN(digits10_of_type * 1.7F, limit)); } return min_arg; @@ -408,7 +474,7 @@ T scaled_tgamma_no_lanczos(const T& z, const Policy& pol, bool islog = false) // Perform the Bernoulli series expansion of Stirling's approximation. - const std::size_t number_of_bernoullis_b2n = policies::get_max_series_iterations(); + const boost::math::size_t number_of_bernoullis_b2n = policies::get_max_series_iterations(); T one_over_x_pow_two_n_minus_one = 1 / z; const T one_over_x2 = one_over_x_pow_two_n_minus_one * one_over_x_pow_two_n_minus_one; @@ -417,11 +483,11 @@ T scaled_tgamma_no_lanczos(const T& z, const Policy& pol, bool islog = false) const T half_ln_two_pi_over_z = sqrt(boost::math::constants::two_pi() / z); T last_term = 2 * sum; - for (std::size_t n = 2U;; ++n) + for (boost::math::size_t n = 2U;; ++n) { one_over_x_pow_two_n_minus_one *= one_over_x2; - const std::size_t n2 = static_cast(n * 2U); + const boost::math::size_t n2 = static_cast(n * 2U); const T term = (boost::math::bernoulli_b2n(static_cast(n)) * one_over_x_pow_two_n_minus_one) / (n2 * (n2 - 1U)); @@ -460,7 +526,7 @@ T gamma_imp(T z, const Policy& pol, const lanczos::undefined_lanczos&) { BOOST_MATH_STD_USING - static const char* function = "boost::math::tgamma<%1%>(%1%)"; + constexpr auto function = "boost::math::tgamma<%1%>(%1%)"; // Check if the argument of tgamma is identically zero. const bool is_at_zero = (z == 0); @@ -569,7 +635,7 @@ T gamma_imp(T z, const Policy& pol, const lanczos::undefined_lanczos&) if(gamma_value == 0) return policies::raise_underflow_error(function, "Result of tgamma is too small to represent.", pol); - if((boost::math::fpclassify)(gamma_value) == static_cast(FP_SUBNORMAL)) + if((boost::math::fpclassify)(gamma_value) == static_cast(BOOST_MATH_FP_SUBNORMAL)) return policies::raise_denorm_error(function, "Result of tgamma is denormalized.", gamma_value, pol); } @@ -610,7 +676,7 @@ T lgamma_imp(T z, const Policy& pol, const lanczos::undefined_lanczos&, int* sig { BOOST_MATH_STD_USING - static const char* function = "boost::math::lgamma<%1%>(%1%)"; + constexpr auto function = "boost::math::lgamma<%1%>(%1%)"; // Check if the argument of lgamma is identically zero. const bool is_at_zero = (z == 0); @@ -715,18 +781,33 @@ T lgamma_imp(T z, const Policy& pol, const lanczos::undefined_lanczos&, int* sig return log_gamma_value; } +#endif // BOOST_MATH_HAS_GPU_SUPPORT + +// In order for tgammap1m1_imp to compile we need a forward decl of boost::math::tgamma +// The rub is that we can't just use math_fwd so we provide one here only in that circumstance +#ifdef BOOST_MATH_HAS_NVRTC +template +BOOST_MATH_GPU_ENABLED tools::promote_args_t tgamma(RT z); + +template +BOOST_MATH_GPU_ENABLED tools::promote_args_t tgamma(RT1 a, RT2 z); + +template +BOOST_MATH_GPU_ENABLED tools::promote_args_t tgamma(RT1 a, RT2 z, const Policy& pol); +#endif + // // This helper calculates tgamma(dz+1)-1 without cancellation errors, // used by the upper incomplete gamma with z < 1: // template -T tgammap1m1_imp(T dz, Policy const& pol, const Lanczos& l) +BOOST_MATH_GPU_ENABLED T tgammap1m1_imp(T dz, Policy const& pol, const Lanczos& l) { BOOST_MATH_STD_USING typedef typename policies::precision::type precision_type; - typedef std::integral_constant inline T tgammap1m1_imp(T z, Policy const& pol, const ::boost::math::lanczos::undefined_lanczos&) @@ -781,6 +872,8 @@ inline T tgammap1m1_imp(T z, Policy const& pol, return boost::math::expm1(boost::math::lgamma(1 + z, pol)); } +#endif // BOOST_MATH_HAS_GPU_SUPPORT + // // Series representation for upper fraction when z is small: // @@ -789,9 +882,9 @@ struct small_gamma2_series { typedef T result_type; - small_gamma2_series(T a_, T x_) : result(-x_), x(-x_), apn(a_+1), n(1){} + BOOST_MATH_GPU_ENABLED small_gamma2_series(T a_, T x_) : result(-x_), x(-x_), apn(a_+1), n(1){} - T operator()() + BOOST_MATH_GPU_ENABLED T operator()() { T r = result / (apn); result *= x; @@ -809,7 +902,7 @@ struct small_gamma2_series // incomplete gammas: // template -T full_igamma_prefix(T a, T z, const Policy& pol) +BOOST_MATH_GPU_ENABLED T full_igamma_prefix(T a, T z, const Policy& pol) { BOOST_MATH_STD_USING @@ -854,7 +947,7 @@ T full_igamma_prefix(T a, T z, const Policy& pol) // This error handling isn't very good: it happens after the fact // rather than before it... // - if((boost::math::fpclassify)(prefix) == (int)FP_INFINITE) + if((boost::math::fpclassify)(prefix) == (int)BOOST_MATH_FP_INFINITE) return policies::raise_overflow_error("boost::math::detail::full_igamma_prefix<%1%>(%1%, %1%)", "Result of incomplete gamma function is too large to represent.", pol); return prefix; @@ -864,7 +957,7 @@ T full_igamma_prefix(T a, T z, const Policy& pol) // most if the error occurs in this function: // template -T regularised_gamma_prefix(T a, T z, const Policy& pol, const Lanczos& l) +BOOST_MATH_GPU_ENABLED T regularised_gamma_prefix(T a, T z, const Policy& pol, const Lanczos& l) { BOOST_MATH_STD_USING if (z >= tools::max_value()) @@ -911,16 +1004,16 @@ T regularised_gamma_prefix(T a, T z, const Policy& pol, const Lanczos& l) // T alz = a * log(z / agh); T amz = a - z; - if(((std::min)(alz, amz) <= tools::log_min_value()) || ((std::max)(alz, amz) >= tools::log_max_value())) + if((BOOST_MATH_GPU_SAFE_MIN(alz, amz) <= tools::log_min_value()) || (BOOST_MATH_GPU_SAFE_MAX(alz, amz) >= tools::log_max_value())) { T amza = amz / a; - if(((std::min)(alz, amz)/2 > tools::log_min_value()) && ((std::max)(alz, amz)/2 < tools::log_max_value())) + if((BOOST_MATH_GPU_SAFE_MIN(alz, amz)/2 > tools::log_min_value()) && (BOOST_MATH_GPU_SAFE_MAX(alz, amz)/2 < tools::log_max_value())) { // compute square root of the result and then square it: T sq = pow(z / agh, a / 2) * exp(amz / 2); prefix = sq * sq; } - else if(((std::min)(alz, amz)/4 > tools::log_min_value()) && ((std::max)(alz, amz)/4 < tools::log_max_value()) && (z > a)) + else if((BOOST_MATH_GPU_SAFE_MIN(alz, amz)/4 > tools::log_min_value()) && (BOOST_MATH_GPU_SAFE_MAX(alz, amz)/4 < tools::log_max_value()) && (z > a)) { // compute the 4th root of the result then square it twice: T sq = pow(z / agh, a / 4) * exp(amz / 4); @@ -944,6 +1037,9 @@ T regularised_gamma_prefix(T a, T z, const Policy& pol, const Lanczos& l) prefix *= sqrt(agh / boost::math::constants::e()) / Lanczos::lanczos_sum_expG_scaled(a); return prefix; } + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT + // // And again, without Lanczos support: // @@ -1013,18 +1109,28 @@ T regularised_gamma_prefix(T a, T z, const Policy& pol, const lanczos::undefined } } } + +#endif // BOOST_MATH_HAS_GPU_SUPPORT + // // Upper gamma fraction for very small a: // template -inline T tgamma_small_upper_part(T a, T x, const Policy& pol, T* pgam = 0, bool invert = false, T* pderivative = 0) +BOOST_MATH_GPU_ENABLED inline T tgamma_small_upper_part(T a, T x, const Policy& pol, T* pgam = 0, bool invert = false, T* pderivative = 0) { BOOST_MATH_STD_USING // ADL of std functions. // // Compute the full upper fraction (Q) when a is very small: // + #ifdef BOOST_MATH_HAS_NVRTC + typedef typename tools::promote_args::type result_type; + typedef typename policies::evaluation::type value_type; + typedef typename lanczos::lanczos::type evaluation_type; + T result {detail::tgammap1m1_imp(static_cast(a), pol, evaluation_type())}; + #else T result { boost::math::tgamma1pm1(a, pol) }; + #endif if(pgam) *pgam = (result + 1) / a; @@ -1032,7 +1138,7 @@ inline T tgamma_small_upper_part(T a, T x, const Policy& pol, T* pgam = 0, bool result -= p; result /= a; detail::small_gamma2_series s(a, x); - std::uintmax_t max_iter = policies::get_max_series_iterations() - 10; + boost::math::uintmax_t max_iter = policies::get_max_series_iterations() - 10; p += 1; if(pderivative) *pderivative = p / (*pgam * exp(x)); @@ -1047,7 +1153,7 @@ inline T tgamma_small_upper_part(T a, T x, const Policy& pol, T* pgam = 0, bool // Upper gamma fraction for integer a: // template -inline T finite_gamma_q(T a, T x, Policy const& pol, T* pderivative = 0) +BOOST_MATH_GPU_ENABLED inline T finite_gamma_q(T a, T x, Policy const& pol, T* pderivative = 0) { // // Calculates normalised Q when a is an integer: @@ -1075,13 +1181,27 @@ inline T finite_gamma_q(T a, T x, Policy const& pol, T* pderivative = 0) // Upper gamma fraction for half integer a: // template -T finite_half_gamma_q(T a, T x, T* p_derivative, const Policy& pol) +BOOST_MATH_GPU_ENABLED T finite_half_gamma_q(T a, T x, T* p_derivative, const Policy& pol) { // // Calculates normalised Q when a is a half-integer: // BOOST_MATH_STD_USING + + #ifdef BOOST_MATH_HAS_NVRTC + T e; + if (boost::math::is_same_v) + { + e = ::erfcf(::sqrtf(x)); + } + else + { + e = ::erfc(::sqrt(x)); + } + #else T e = boost::math::erfc(sqrt(x), pol); + #endif + if((e != 0) && (a > 1)) { T term = exp(-x) / sqrt(constants::pi() * x); @@ -1115,9 +1235,9 @@ template struct incomplete_tgamma_large_x_series { typedef T result_type; - incomplete_tgamma_large_x_series(const T& a, const T& x) + BOOST_MATH_GPU_ENABLED incomplete_tgamma_large_x_series(const T& a, const T& x) : a_poch(a - 1), z(x), term(1) {} - T operator()() + BOOST_MATH_GPU_ENABLED T operator()() { T result = term; term *= a_poch / z; @@ -1128,11 +1248,11 @@ struct incomplete_tgamma_large_x_series }; template -T incomplete_tgamma_large_x(const T& a, const T& x, const Policy& pol) +BOOST_MATH_GPU_ENABLED T incomplete_tgamma_large_x(const T& a, const T& x, const Policy& pol) { BOOST_MATH_STD_USING incomplete_tgamma_large_x_series s(a, x); - std::uintmax_t max_iter = boost::math::policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = boost::math::policies::get_max_series_iterations(); T result = boost::math::tools::sum_series(s, boost::math::policies::get_epsilon(), max_iter); boost::math::policies::check_series_iterations("boost::math::tgamma<%1%>(%1%,%1%)", max_iter, pol); return result; @@ -1143,10 +1263,10 @@ T incomplete_tgamma_large_x(const T& a, const T& x, const Policy& pol) // Main incomplete gamma entry point, handles all four incomplete gamma's: // template -T gamma_incomplete_imp(T a, T x, bool normalised, bool invert, +BOOST_MATH_GPU_ENABLED T gamma_incomplete_imp_final(T a, T x, bool normalised, bool invert, const Policy& pol, T* p_derivative) { - static const char* function = "boost::math::gamma_p<%1%>(%1%, %1%)"; + constexpr auto function = "boost::math::gamma_p<%1%>(%1%, %1%)"; if(a <= 0) return policies::raise_domain_error(function, "Argument a to the incomplete gamma function must be greater than zero (got a=%1%).", a, pol); if(x < 0) @@ -1158,70 +1278,6 @@ T gamma_incomplete_imp(T a, T x, bool normalised, bool invert, T result = 0; // Just to avoid warning C4701: potentially uninitialized local variable 'result' used - if(a >= max_factorial::value && !normalised) - { - // - // When we're computing the non-normalized incomplete gamma - // and a is large the result is rather hard to compute unless - // we use logs. There are really two options - if x is a long - // way from a in value then we can reliably use methods 2 and 4 - // below in logarithmic form and go straight to the result. - // Otherwise we let the regularized gamma take the strain - // (the result is unlikely to underflow in the central region anyway) - // and combine with lgamma in the hopes that we get a finite result. - // - if(invert && (a * 4 < x)) - { - // This is method 4 below, done in logs: - result = a * log(x) - x; - if(p_derivative) - *p_derivative = exp(result); - result += log(upper_gamma_fraction(a, x, policies::get_epsilon())); - } - else if(!invert && (a > 4 * x)) - { - // This is method 2 below, done in logs: - result = a * log(x) - x; - if(p_derivative) - *p_derivative = exp(result); - T init_value = 0; - result += log(detail::lower_gamma_series(a, x, pol, init_value) / a); - } - else - { - result = gamma_incomplete_imp(a, x, true, invert, pol, p_derivative); - if(result == 0) - { - if(invert) - { - // Try http://functions.wolfram.com/06.06.06.0039.01 - result = 1 + 1 / (12 * a) + 1 / (288 * a * a); - result = log(result) - a + (a - 0.5f) * log(a) + log(boost::math::constants::root_two_pi()); - if(p_derivative) - *p_derivative = exp(a * log(x) - x); - } - else - { - // This is method 2 below, done in logs, we're really outside the - // range of this method, but since the result is almost certainly - // infinite, we should probably be OK: - result = a * log(x) - x; - if(p_derivative) - *p_derivative = exp(result); - T init_value = 0; - result += log(detail::lower_gamma_series(a, x, pol, init_value) / a); - } - } - else - { - result = log(result) + boost::math::lgamma(a, pol); - } - } - if(result > tools::log_max_value()) - return policies::raise_overflow_error(function, nullptr, pol); - return exp(result); - } - BOOST_MATH_ASSERT((p_derivative == nullptr) || normalised); bool is_int, is_half_int; @@ -1297,7 +1353,7 @@ T gamma_incomplete_imp(T a, T x, bool normalised, bool invert, // series and continued fractions are slow to converge: // bool use_temme = false; - if(normalised && std::numeric_limits::is_specialized && (a > 20)) + if(normalised && boost::math::numeric_limits::is_specialized && (a > 20)) { T sigma = fabs((x-a)/a); if((a > 200) && (policies::digits() <= 113)) @@ -1354,14 +1410,40 @@ T gamma_incomplete_imp(T a, T x, bool normalised, bool invert, { result = finite_gamma_q(a, x, pol, p_derivative); if(!normalised) + { + #ifdef BOOST_MATH_HAS_NVRTC + if (boost::math::is_same_v) + { + result *= ::tgammaf(a); + } + else + { + result *= ::tgamma(a); + } + #else result *= boost::math::tgamma(a, pol); + #endif + } break; } case 1: { result = finite_half_gamma_q(a, x, p_derivative, pol); if(!normalised) + { + #ifdef BOOST_MATH_HAS_NVRTC + if (boost::math::is_same_v) + { + result *= ::tgammaf(a); + } + else + { + result *= ::tgamma(a); + } + #else result *= boost::math::tgamma(a, pol); + #endif + } if(p_derivative && (*p_derivative == 0)) *p_derivative = regularised_gamma_prefix(a, x, pol, lanczos_type()); break; @@ -1390,7 +1472,19 @@ T gamma_incomplete_imp(T a, T x, bool normalised, bool invert, bool optimised_invert = false; if(invert) { + #ifdef BOOST_MATH_HAS_NVRTC + if (boost::math::is_same_v) + { + init_value = (normalised ? 1 : ::tgammaf(a)); + } + else + { + init_value = (normalised ? 1 : ::tgamma(a)); + } + #else init_value = (normalised ? 1 : boost::math::tgamma(a, pol)); + #endif + if(normalised || (result >= 1) || (tools::max_value() * result > init_value)) { init_value /= result; @@ -1447,14 +1541,14 @@ T gamma_incomplete_imp(T a, T x, bool normalised, bool invert, // typedef typename policies::precision::type precision_type; - typedef std::integral_constant tag_type; - result = igamma_temme_large(a, x, pol, static_cast(nullptr)); + result = igamma_temme_large(a, x, pol, tag_type()); if(x >= a) invert = !invert; if(p_derivative) @@ -1473,7 +1567,18 @@ T gamma_incomplete_imp(T a, T x, bool normalised, bool invert, try { #endif + #ifdef BOOST_MATH_HAS_NVRTC + if (boost::math::is_same_v) + { + result = ::powf(x, a) / ::tgammaf(a + 1); + } + else + { + result = ::pow(x, a) / ::tgamma(a + 1); + } + #else result = pow(x, a) / boost::math::tgamma(a + 1, pol); + #endif #ifndef BOOST_MATH_NO_EXCEPTIONS } catch (const std::overflow_error&) @@ -1505,7 +1610,19 @@ T gamma_incomplete_imp(T a, T x, bool normalised, bool invert, result = 1; if(invert) { + #ifdef BOOST_MATH_HAS_NVRTC + T gam; + if (boost::math::is_same_v) + { + gam = normalised ? 1 : ::tgammaf(a); + } + else + { + gam = normalised ? 1 : ::tgamma(a); + } + #else T gam = normalised ? 1 : boost::math::tgamma(a, pol); + #endif result = gam - result; } if(p_derivative) @@ -1525,36 +1642,109 @@ T gamma_incomplete_imp(T a, T x, bool normalised, bool invert, return result; } -// -// Ratios of two gamma functions: -// -template -T tgamma_delta_ratio_imp_lanczos(T z, T delta, const Policy& pol, const Lanczos& l) +// Need to implement this dispatch to avoid recursion for device compilers +template +BOOST_MATH_GPU_ENABLED T gamma_incomplete_imp(T a, T x, bool normalised, bool invert, + const Policy& pol, T* p_derivative) { + constexpr auto function = "boost::math::gamma_p<%1%>(%1%, %1%)"; + if(a <= 0) + return policies::raise_domain_error(function, "Argument a to the incomplete gamma function must be greater than zero (got a=%1%).", a, pol); + if(x < 0) + return policies::raise_domain_error(function, "Argument x to the incomplete gamma function must be >= 0 (got x=%1%).", x, pol); + BOOST_MATH_STD_USING - if(z < tools::epsilon()) + + + T result = 0; // Just to avoid warning C4701: potentially uninitialized local variable 'result' used + + if(a >= max_factorial::value && !normalised) { // - // We get spurious numeric overflow unless we're very careful, this - // can occur either inside Lanczos::lanczos_sum(z) or in the - // final combination of terms, to avoid this, split the product up - // into 2 (or 3) parts: - // - // G(z) / G(L) = 1 / (z * G(L)) ; z < eps, L = z + delta = delta - // z * G(L) = z * G(lim) * (G(L)/G(lim)) ; lim = largest factorial + // When we're computing the non-normalized incomplete gamma + // and a is large the result is rather hard to compute unless + // we use logs. There are really two options - if x is a long + // way from a in value then we can reliably use methods 2 and 4 + // below in logarithmic form and go straight to the result. + // Otherwise we let the regularized gamma take the strain + // (the result is unlikely to underflow in the central region anyway) + // and combine with lgamma in the hopes that we get a finite result. // - if(boost::math::max_factorial::value < delta) + if(invert && (a * 4 < x)) { - T ratio = tgamma_delta_ratio_imp_lanczos(delta, T(boost::math::max_factorial::value - delta), pol, l); - ratio *= z; - ratio *= boost::math::unchecked_factorial(boost::math::max_factorial::value - 1); - return 1 / ratio; + // This is method 4 below, done in logs: + result = a * log(x) - x; + if(p_derivative) + *p_derivative = exp(result); + result += log(upper_gamma_fraction(a, x, policies::get_epsilon())); + } + else if(!invert && (a > 4 * x)) + { + // This is method 2 below, done in logs: + result = a * log(x) - x; + if(p_derivative) + *p_derivative = exp(result); + T init_value = 0; + result += log(detail::lower_gamma_series(a, x, pol, init_value) / a); } else { - return 1 / (z * boost::math::tgamma(z + delta, pol)); + result = gamma_incomplete_imp_final(T(a), T(x), true, invert, pol, p_derivative); + if(result == 0) + { + if(invert) + { + // Try http://functions.wolfram.com/06.06.06.0039.01 + result = 1 + 1 / (12 * a) + 1 / (288 * a * a); + result = log(result) - a + (a - 0.5f) * log(a) + log(boost::math::constants::root_two_pi()); + if(p_derivative) + *p_derivative = exp(a * log(x) - x); + } + else + { + // This is method 2 below, done in logs, we're really outside the + // range of this method, but since the result is almost certainly + // infinite, we should probably be OK: + result = a * log(x) - x; + if(p_derivative) + *p_derivative = exp(result); + T init_value = 0; + result += log(detail::lower_gamma_series(a, x, pol, init_value) / a); + } + } + else + { + #ifdef BOOST_MATH_HAS_NVRTC + if (boost::math::is_same_v) + { + result = ::logf(result) + ::lgammaf(a); + } + else + { + result = ::log(result) + ::lgamma(a); + } + #else + result = log(result) + boost::math::lgamma(a, pol); + #endif + } } + if(result > tools::log_max_value()) + return policies::raise_overflow_error(function, nullptr, pol); + return exp(result); } + + // If no special handling is required then we proceeds as normal + return gamma_incomplete_imp_final(T(a), T(x), normalised, invert, pol, p_derivative); +} + +// +// Ratios of two gamma functions: +// +template +BOOST_MATH_GPU_ENABLED T tgamma_delta_ratio_imp_lanczos_final(T z, T delta, const Policy& pol, const Lanczos&) +{ + BOOST_MATH_STD_USING + T zgh = static_cast(z + T(Lanczos::g()) - constants::half()); T result; if(z + delta == z) @@ -1588,9 +1778,55 @@ T tgamma_delta_ratio_imp_lanczos(T z, T delta, const Policy& pol, const Lanczos& result *= pow(T(constants::e() / (zgh + delta)), delta); return result; } + +template +BOOST_MATH_GPU_ENABLED T tgamma_delta_ratio_imp_lanczos(T z, T delta, const Policy& pol, const Lanczos& l) +{ + BOOST_MATH_STD_USING + + if(z < tools::epsilon()) + { + // + // We get spurious numeric overflow unless we're very careful, this + // can occur either inside Lanczos::lanczos_sum(z) or in the + // final combination of terms, to avoid this, split the product up + // into 2 (or 3) parts: + // + // G(z) / G(L) = 1 / (z * G(L)) ; z < eps, L = z + delta = delta + // z * G(L) = z * G(lim) * (G(L)/G(lim)) ; lim = largest factorial + // + if(boost::math::max_factorial::value < delta) + { + T ratio = tgamma_delta_ratio_imp_lanczos_final(T(delta), T(boost::math::max_factorial::value - delta), pol, l); + ratio *= z; + ratio *= boost::math::unchecked_factorial(boost::math::max_factorial::value - 1); + return 1 / ratio; + } + else + { + #ifdef BOOST_MATH_HAS_NVRTC + if (boost::math::is_same_v) + { + return 1 / (z * ::tgammaf(z + delta)); + } + else + { + return 1 / (z * ::tgamma(z + delta)); + } + #else + return 1 / (z * boost::math::tgamma(z + delta, pol)); + #endif + } + } + + return tgamma_delta_ratio_imp_lanczos_final(T(z), T(delta), pol, l); +} + // // And again without Lanczos support this time: // +#ifndef BOOST_MATH_HAS_GPU_SUPPORT + template T tgamma_delta_ratio_imp_lanczos(T z, T delta, const Policy& pol, const lanczos::undefined_lanczos& l) { @@ -1647,15 +1883,28 @@ T tgamma_delta_ratio_imp_lanczos(T z, T delta, const Policy& pol, const lanczos: return ratio; } +#endif + template -T tgamma_delta_ratio_imp(T z, T delta, const Policy& pol) +BOOST_MATH_GPU_ENABLED T tgamma_delta_ratio_imp(T z, T delta, const Policy& pol) { BOOST_MATH_STD_USING if((z <= 0) || (z + delta <= 0)) { // This isn't very sophisticated, or accurate, but it does work: + #ifdef BOOST_MATH_HAS_NVRTC + if (boost::math::is_same_v) + { + return ::tgammaf(z) / ::tgammaf(z + delta); + } + else + { + return ::tgamma(z) / ::tgamma(z + delta); + } + #else return boost::math::tgamma(z, pol) / boost::math::tgamma(z + delta, pol); + #endif } if(floor(delta) == delta) @@ -1706,7 +1955,7 @@ T tgamma_delta_ratio_imp(T z, T delta, const Policy& pol) } template -T tgamma_ratio_imp(T x, T y, const Policy& pol) +BOOST_MATH_GPU_ENABLED T tgamma_ratio_imp(T x, T y, const Policy& pol) { BOOST_MATH_STD_USING @@ -1715,17 +1964,32 @@ T tgamma_ratio_imp(T x, T y, const Policy& pol) if((y <= 0) || (boost::math::isinf)(y)) return policies::raise_domain_error("boost::math::tgamma_ratio<%1%>(%1%, %1%)", "Gamma function ratios only implemented for positive arguments (got b=%1%).", y, pol); + // We don't need to worry about the denorm case on device + // And this has the added bonus of removing recursion + #ifndef BOOST_MATH_HAS_GPU_SUPPORT if(x <= tools::min_value()) { // Special case for denorms...Ugh. T shift = ldexp(T(1), tools::digits()); return shift * tgamma_ratio_imp(T(x * shift), y, pol); } + #endif if((x < max_factorial::value) && (y < max_factorial::value)) { // Rather than subtracting values, lets just call the gamma functions directly: + #ifdef BOOST_MATH_HAS_NVRTC + if (boost::math::is_same_v) + { + return ::tgammaf(x) / ::tgammaf(y); + } + else + { + return ::tgamma(x) / ::tgamma(y); + } + #else return boost::math::tgamma(x, pol) / boost::math::tgamma(y, pol); + #endif } T prefix = 1; if(x < 1) @@ -1741,12 +2005,35 @@ T tgamma_ratio_imp(T x, T y, const Policy& pol) y -= 1; prefix /= y; } + + #ifdef BOOST_MATH_HAS_NVRTC + if (boost::math::is_same_v) + { + return prefix * ::tgammaf(x) / ::tgammaf(y); + } + else + { + return prefix * ::tgamma(x) / ::tgamma(y); + } + #else return prefix * boost::math::tgamma(x, pol) / boost::math::tgamma(y, pol); + #endif } // // result is almost certainly going to underflow to zero, try logs just in case: // + #ifdef BOOST_MATH_HAS_NVRTC + if (boost::math::is_same_v) + { + return ::expf(::lgammaf(x) - ::lgammaf(y)); + } + else + { + return ::exp(::lgamma(x) - ::lgamma(y)); + } + #else return exp(boost::math::lgamma(x, pol) - boost::math::lgamma(y, pol)); + #endif } if(y < 1) { @@ -1761,21 +2048,48 @@ T tgamma_ratio_imp(T x, T y, const Policy& pol) x -= 1; prefix *= x; } + + #ifdef BOOST_MATH_HAS_NVRTC + if (boost::math::is_same_v) + { + return prefix * ::tgammaf(x) / ::tgammaf(y); + } + else + { + return prefix * ::tgamma(x) / ::tgamma(y); + } + #else return prefix * boost::math::tgamma(x, pol) / boost::math::tgamma(y, pol); + #endif } // // Result will almost certainly overflow, try logs just in case: // + #ifdef BOOST_MATH_HAS_NVRTC + if (boost::math::is_same_v) + { + return ::expf(::lgammaf(x) - ::lgammaf(y)); + } + else + { + return ::exp(::lgamma(x) - ::lgamma(y)); + } + #else return exp(boost::math::lgamma(x, pol) - boost::math::lgamma(y, pol)); + #endif } // // Regular case, x and y both large and similar in magnitude: // + #ifdef BOOST_MATH_HAS_NVRTC + return detail::tgamma_delta_ratio_imp(x, y - x, pol); + #else return boost::math::tgamma_delta_ratio(x, y - x, pol); + #endif } template -T gamma_p_derivative_imp(T a, T x, const Policy& pol) +BOOST_MATH_GPU_ENABLED T gamma_p_derivative_imp(T a, T x, const Policy& pol) { BOOST_MATH_STD_USING // @@ -1806,7 +2120,18 @@ T gamma_p_derivative_imp(T a, T x, const Policy& pol) if(f1 == 0) { // Underflow in calculation, use logs instead: + #ifdef BOOST_MATH_HAS_NVRTC + if (boost::math::is_same_v) + { + f1 = a * ::logf(x) - x - ::lgammaf(a) - ::logf(x); + } + else + { + f1 = a * ::log(x) - x - ::lgamma(a) - ::log(x); + } + #else f1 = a * log(x) - x - lgamma(a, pol) - log(x); + #endif f1 = exp(f1); } else @@ -1816,8 +2141,8 @@ T gamma_p_derivative_imp(T a, T x, const Policy& pol) } template -inline typename tools::promote_args::type - tgamma(T z, const Policy& /* pol */, const std::true_type) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type + tgamma(T z, const Policy& /* pol */, const boost::math::true_type) { BOOST_FPU_EXCEPTION_GUARD typedef typename tools::promote_args::type result_type; @@ -1837,11 +2162,11 @@ struct igamma_initializer { struct init { - init() + BOOST_MATH_GPU_ENABLED init() { typedef typename policies::precision::type precision_type; - typedef std::integral_constant - static void do_init(const std::integral_constant&) + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&) { // If std::numeric_limits::digits is zero, we must not call // our initialization code here as the precision presumably // varies at runtime, and will not have been set yet. Plus the // code requiring initialization isn't called when digits == 0. - if(std::numeric_limits::digits) + if (boost::math::numeric_limits::digits) { boost::math::gamma_p(static_cast(400), static_cast(400), Policy()); } } - static void do_init(const std::integral_constant&){} - void force_instantiate()const{} + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&){} + BOOST_MATH_GPU_ENABLED void force_instantiate()const{} }; - static const init initializer; - static void force_instantiate() + BOOST_MATH_STATIC const init initializer; + BOOST_MATH_GPU_ENABLED static void force_instantiate() { + #ifndef BOOST_MATH_HAS_GPU_SUPPORT initializer.force_instantiate(); + #endif } }; @@ -1880,10 +2207,10 @@ struct lgamma_initializer { struct init { - init() + BOOST_MATH_GPU_ENABLED init() { typedef typename policies::precision::type precision_type; - typedef std::integral_constant&) + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&) { boost::math::lgamma(static_cast(2.5), Policy()); boost::math::lgamma(static_cast(1.25), Policy()); boost::math::lgamma(static_cast(1.75), Policy()); } - static void do_init(const std::integral_constant&) + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&) { boost::math::lgamma(static_cast(2.5), Policy()); boost::math::lgamma(static_cast(1.25), Policy()); boost::math::lgamma(static_cast(1.5), Policy()); boost::math::lgamma(static_cast(1.75), Policy()); } - static void do_init(const std::integral_constant&) + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&) { } - void force_instantiate()const{} + BOOST_MATH_GPU_ENABLED void force_instantiate()const{} }; - static const init initializer; - static void force_instantiate() + BOOST_MATH_STATIC const init initializer; + BOOST_MATH_GPU_ENABLED static void force_instantiate() { + #ifndef BOOST_MATH_HAS_GPU_SUPPORT initializer.force_instantiate(); + #endif } }; @@ -1920,8 +2249,8 @@ template const typename lgamma_initializer::init lgamma_initializer::initializer; template -inline tools::promote_args_t - tgamma(T1 a, T2 z, const Policy&, const std::false_type) +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t + tgamma(T1 a, T2 z, const Policy&, const boost::math::false_type) { BOOST_FPU_EXCEPTION_GUARD typedef tools::promote_args_t result_type; @@ -1943,8 +2272,8 @@ inline tools::promote_args_t } template -inline tools::promote_args_t - tgamma(T1 a, T2 z, const std::false_type& tag) +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t + tgamma(T1 a, T2 z, const boost::math::false_type& tag) { return tgamma(a, z, policies::policy<>(), tag); } @@ -1952,15 +2281,8 @@ inline tools::promote_args_t } // namespace detail -template -inline typename tools::promote_args::type - tgamma(T z) -{ - return tgamma(z, policies::policy<>()); -} - template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type lgamma(T z, int* sign, const Policy&) { BOOST_FPU_EXCEPTION_GUARD @@ -1980,28 +2302,28 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type lgamma(T z, int* sign) { return lgamma(z, sign, policies::policy<>()); } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type lgamma(T x, const Policy& pol) { return ::boost::math::lgamma(x, nullptr, pol); } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type lgamma(T x) { return ::boost::math::lgamma(x, nullptr, policies::policy<>()); } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type tgamma1pm1(T z, const Policy& /* pol */) { BOOST_FPU_EXCEPTION_GUARD @@ -2015,11 +2337,11 @@ inline typename tools::promote_args::type policies::discrete_quantile<>, policies::assert_undefined<> >::type forwarding_policy; - return policies::checked_narrowing_cast::type, forwarding_policy>(detail::tgammap1m1_imp(static_cast(z), forwarding_policy(), evaluation_type()), "boost::math::tgamma1pm1<%!%>(%1%)"); + return policies::checked_narrowing_cast::type, forwarding_policy>(detail::tgammap1m1_imp(static_cast(z), forwarding_policy(), evaluation_type()), "boost::math::tgamma1pm1<%!%>(%1%)"); } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type tgamma1pm1(T z) { return tgamma1pm1(z, policies::policy<>()); @@ -2029,7 +2351,7 @@ inline typename tools::promote_args::type // Full upper incomplete gamma: // template -inline tools::promote_args_t +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t tgamma(T1 a, T2 z) { // @@ -2041,17 +2363,23 @@ inline tools::promote_args_t return static_cast(detail::tgamma(a, z, maybe_policy())); } template -inline tools::promote_args_t +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t tgamma(T1 a, T2 z, const Policy& pol) { using result_type = tools::promote_args_t; - return static_cast(detail::tgamma(a, z, pol, std::false_type())); + return static_cast(detail::tgamma(a, z, pol, boost::math::false_type())); +} +template +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type + tgamma(T z) +{ + return tgamma(z, policies::policy<>()); } // // Full lower incomplete gamma: // template -inline tools::promote_args_t +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t tgamma_lower(T1 a, T2 z, const Policy&) { BOOST_FPU_EXCEPTION_GUARD @@ -2073,7 +2401,7 @@ inline tools::promote_args_t forwarding_policy(), static_cast(nullptr)), "tgamma_lower<%1%>(%1%, %1%)"); } template -inline tools::promote_args_t +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t tgamma_lower(T1 a, T2 z) { return tgamma_lower(a, z, policies::policy<>()); @@ -2082,7 +2410,7 @@ inline tools::promote_args_t // Regularised upper incomplete gamma: // template -inline tools::promote_args_t +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t gamma_q(T1 a, T2 z, const Policy& /* pol */) { BOOST_FPU_EXCEPTION_GUARD @@ -2104,7 +2432,7 @@ inline tools::promote_args_t forwarding_policy(), static_cast(nullptr)), "gamma_q<%1%>(%1%, %1%)"); } template -inline tools::promote_args_t +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t gamma_q(T1 a, T2 z) { return gamma_q(a, z, policies::policy<>()); @@ -2113,7 +2441,7 @@ inline tools::promote_args_t // Regularised lower incomplete gamma: // template -inline tools::promote_args_t +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t gamma_p(T1 a, T2 z, const Policy&) { BOOST_FPU_EXCEPTION_GUARD @@ -2135,7 +2463,7 @@ inline tools::promote_args_t forwarding_policy(), static_cast(nullptr)), "gamma_p<%1%>(%1%, %1%)"); } template -inline tools::promote_args_t +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t gamma_p(T1 a, T2 z) { return gamma_p(a, z, policies::policy<>()); @@ -2143,7 +2471,7 @@ inline tools::promote_args_t // ratios of gamma functions: template -inline tools::promote_args_t +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t tgamma_delta_ratio(T1 z, T2 delta, const Policy& /* pol */) { BOOST_FPU_EXCEPTION_GUARD @@ -2159,13 +2487,13 @@ inline tools::promote_args_t return policies::checked_narrowing_cast(detail::tgamma_delta_ratio_imp(static_cast(z), static_cast(delta), forwarding_policy()), "boost::math::tgamma_delta_ratio<%1%>(%1%, %1%)"); } template -inline tools::promote_args_t +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t tgamma_delta_ratio(T1 z, T2 delta) { return tgamma_delta_ratio(z, delta, policies::policy<>()); } template -inline tools::promote_args_t +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t tgamma_ratio(T1 a, T2 b, const Policy&) { typedef tools::promote_args_t result_type; @@ -2180,14 +2508,14 @@ inline tools::promote_args_t return policies::checked_narrowing_cast(detail::tgamma_ratio_imp(static_cast(a), static_cast(b), forwarding_policy()), "boost::math::tgamma_delta_ratio<%1%>(%1%, %1%)"); } template -inline tools::promote_args_t +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t tgamma_ratio(T1 a, T2 b) { return tgamma_ratio(a, b, policies::policy<>()); } template -inline tools::promote_args_t +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t gamma_p_derivative(T1 a, T2 x, const Policy&) { BOOST_FPU_EXCEPTION_GUARD @@ -2203,7 +2531,7 @@ inline tools::promote_args_t return policies::checked_narrowing_cast(detail::gamma_p_derivative_imp(static_cast(a), static_cast(x), forwarding_policy()), "boost::math::gamma_p_derivative<%1%>(%1%, %1%)"); } template -inline tools::promote_args_t +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t gamma_p_derivative(T1 a, T2 x) { return gamma_p_derivative(a, x, policies::policy<>()); diff --git a/include/boost/math/special_functions/gegenbauer.hpp b/include/boost/math/special_functions/gegenbauer.hpp index b7033cd14f..70324cf656 100644 --- a/include/boost/math/special_functions/gegenbauer.hpp +++ b/include/boost/math/special_functions/gegenbauer.hpp @@ -1,4 +1,5 @@ // (C) Copyright Nick Thompson 2019. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -6,21 +7,25 @@ #ifndef BOOST_MATH_SPECIAL_GEGENBAUER_HPP #define BOOST_MATH_SPECIAL_GEGENBAUER_HPP -#include +#include +#include +#include + +#ifndef BOOST_MATH_NO_EXCEPTIONS #include -#include +#endif namespace boost { namespace math { template -Real gegenbauer(unsigned n, Real lambda, Real x) +BOOST_MATH_GPU_ENABLED Real gegenbauer(unsigned n, Real lambda, Real x) { - static_assert(!std::is_integral::value, "Gegenbauer polynomials required floating point arguments."); + static_assert(!boost::math::is_integral::value, "Gegenbauer polynomials required floating point arguments."); if (lambda <= -1/Real(2)) { #ifndef BOOST_MATH_NO_EXCEPTIONS throw std::domain_error("lambda > -1/2 is required."); #else - return std::numeric_limits::quiet_NaN(); + return boost::math::numeric_limits::quiet_NaN(); #endif } // The only reason to do this is because of some instability that could be present for x < 0 that is not present for x > 0. @@ -41,7 +46,7 @@ Real gegenbauer(unsigned n, Real lambda, Real x) Real yk = y1; Real k = 2; - Real k_max = n*(1+std::numeric_limits::epsilon()); + Real k_max = n*(1+boost::math::numeric_limits::epsilon()); Real gamma = 2*(lambda - 1); while(k < k_max) { @@ -55,7 +60,7 @@ Real gegenbauer(unsigned n, Real lambda, Real x) template -Real gegenbauer_derivative(unsigned n, Real lambda, Real x, unsigned k) +BOOST_MATH_GPU_ENABLED Real gegenbauer_derivative(unsigned n, Real lambda, Real x, unsigned k) { if (k > n) { return Real(0); @@ -70,7 +75,7 @@ Real gegenbauer_derivative(unsigned n, Real lambda, Real x, unsigned k) } template -Real gegenbauer_prime(unsigned n, Real lambda, Real x) { +BOOST_MATH_GPU_ENABLED Real gegenbauer_prime(unsigned n, Real lambda, Real x) { return gegenbauer_derivative(n, lambda, x, 1); } diff --git a/include/boost/math/special_functions/hankel.hpp b/include/boost/math/special_functions/hankel.hpp index 51b8390d99..730c7afa03 100644 --- a/include/boost/math/special_functions/hankel.hpp +++ b/include/boost/math/special_functions/hankel.hpp @@ -1,4 +1,5 @@ // Copyright John Maddock 2012. +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt @@ -7,26 +8,31 @@ #ifndef BOOST_MATH_HANKEL_HPP #define BOOST_MATH_HANKEL_HPP +#include +#include #include #include +#include +#include +#include namespace boost{ namespace math{ namespace detail{ template -std::complex hankel_imp(T v, T x, const bessel_no_int_tag&, const Policy& pol, int sign) +BOOST_MATH_GPU_ENABLED boost::math::complex hankel_imp(T v, T x, const bessel_no_int_tag&, const Policy& pol, int sign) { BOOST_MATH_STD_USING - static const char* function = "boost::math::cyl_hankel_1<%1%>(%1%,%1%)"; + constexpr auto function = "boost::math::cyl_hankel_1<%1%>(%1%,%1%)"; if(x < 0) { bool isint_v = floor(v) == v; T j, y; bessel_jy(v, -x, &j, &y, need_j | need_y, pol); - std::complex cx(x), cv(v); - std::complex j_result, y_result; + boost::math::complex cx(x), cv(v); + boost::math::complex j_result, y_result; if(isint_v) { int s = (iround(v) & 1) ? -1 : 1; @@ -37,12 +43,12 @@ std::complex hankel_imp(T v, T x, const bessel_no_int_tag&, const Policy& pol { j_result = pow(cx, v) * pow(-cx, -v) * j; T p1 = pow(-x, v); - std::complex p2 = pow(cx, v); + boost::math::complex p2 = pow(cx, v); y_result = p1 * y / p2 + (p2 / p1 - p1 / p2) * j / tan(constants::pi() * v); } // multiply y_result by i: - y_result = std::complex(-sign * y_result.imag(), sign * y_result.real()); + y_result = boost::math::complex(-sign * y_result.imag(), sign * y_result.real()); return j_result + y_result; } @@ -51,25 +57,25 @@ std::complex hankel_imp(T v, T x, const bessel_no_int_tag&, const Policy& pol if(v == 0) { // J is 1, Y is -INF - return std::complex(1, sign * -policies::raise_overflow_error(function, nullptr, pol)); + return boost::math::complex(1, sign * -policies::raise_overflow_error(function, nullptr, pol)); } else { // At least one of J and Y is complex infinity: - return std::complex(policies::raise_overflow_error(function, nullptr, pol), sign * policies::raise_overflow_error(function, nullptr, pol)); + return boost::math::complex(policies::raise_overflow_error(function, nullptr, pol), sign * policies::raise_overflow_error(function, nullptr, pol)); } } T j, y; bessel_jy(v, x, &j, &y, need_j | need_y, pol); - return std::complex(j, sign * y); + return boost::math::complex(j, sign * y); } template -std::complex hankel_imp(int v, T x, const bessel_int_tag&, const Policy& pol, int sign); +BOOST_MATH_GPU_ENABLED boost::math::complex hankel_imp(int v, T x, const bessel_int_tag&, const Policy& pol, int sign); template -inline std::complex hankel_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& pol, int sign) +BOOST_MATH_GPU_ENABLED inline boost::math::complex hankel_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& pol, int sign) { BOOST_MATH_STD_USING // ADL of std names. int ival = detail::iconv(v, pol); @@ -81,57 +87,57 @@ inline std::complex hankel_imp(T v, T x, const bessel_maybe_int_tag&, const P } template -inline std::complex hankel_imp(int v, T x, const bessel_int_tag&, const Policy& pol, int sign) +BOOST_MATH_GPU_ENABLED inline boost::math::complex hankel_imp(int v, T x, const bessel_int_tag&, const Policy& pol, int sign) { BOOST_MATH_STD_USING - if((std::abs(v) < 200) && (x > 0)) - return std::complex(bessel_jn(v, x, pol), sign * bessel_yn(v, x, pol)); + if((abs(v) < 200) && (x > 0)) + return boost::math::complex(bessel_jn(v, x, pol), sign * bessel_yn(v, x, pol)); return hankel_imp(static_cast(v), x, bessel_no_int_tag(), pol, sign); } template -inline std::complex sph_hankel_imp(T v, T x, const Policy& pol, int sign) +BOOST_MATH_GPU_ENABLED inline boost::math::complex sph_hankel_imp(T v, T x, const Policy& pol, int sign) { BOOST_MATH_STD_USING - return constants::root_half_pi() * hankel_imp(v + 0.5f, x, bessel_no_int_tag(), pol, sign) / sqrt(std::complex(x)); + return constants::root_half_pi() * hankel_imp(v + 0.5f, x, bessel_no_int_tag(), pol, sign) / sqrt(boost::math::complex(x)); } } // namespace detail template -inline std::complex::result_type> cyl_hankel_1(T1 v, T2 x, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline boost::math::complex::result_type> cyl_hankel_1(T1 v, T2 x, const Policy& pol) { BOOST_FPU_EXCEPTION_GUARD typedef typename detail::bessel_traits::result_type result_type; typedef typename detail::bessel_traits::optimisation_tag tag_type; typedef typename policies::evaluation::type value_type; - return policies::checked_narrowing_cast, Policy>(detail::hankel_imp(v, static_cast(x), tag_type(), pol, 1), "boost::math::cyl_hankel_1<%1%>(%1%,%1%)"); + return policies::checked_narrowing_cast, Policy>(detail::hankel_imp(v, static_cast(x), tag_type(), pol, 1), "boost::math::cyl_hankel_1<%1%>(%1%,%1%)"); } template -inline std::complex >::result_type> cyl_hankel_1(T1 v, T2 x) +BOOST_MATH_GPU_ENABLED inline boost::math::complex >::result_type> cyl_hankel_1(T1 v, T2 x) { return cyl_hankel_1(v, x, policies::policy<>()); } template -inline std::complex::result_type> cyl_hankel_2(T1 v, T2 x, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline boost::math::complex::result_type> cyl_hankel_2(T1 v, T2 x, const Policy& pol) { BOOST_FPU_EXCEPTION_GUARD typedef typename detail::bessel_traits::result_type result_type; typedef typename detail::bessel_traits::optimisation_tag tag_type; typedef typename policies::evaluation::type value_type; - return policies::checked_narrowing_cast, Policy>(detail::hankel_imp(v, static_cast(x), tag_type(), pol, -1), "boost::math::cyl_hankel_1<%1%>(%1%,%1%)"); + return policies::checked_narrowing_cast, Policy>(detail::hankel_imp(v, static_cast(x), tag_type(), pol, -1), "boost::math::cyl_hankel_1<%1%>(%1%,%1%)"); } template -inline std::complex >::result_type> cyl_hankel_2(T1 v, T2 x) +BOOST_MATH_GPU_ENABLED inline boost::math::complex >::result_type> cyl_hankel_2(T1 v, T2 x) { return cyl_hankel_2(v, x, policies::policy<>()); } template -inline std::complex::result_type> sph_hankel_1(T1 v, T2 x, const Policy&) +BOOST_MATH_GPU_ENABLED inline boost::math::complex::result_type> sph_hankel_1(T1 v, T2 x, const Policy&) { BOOST_FPU_EXCEPTION_GUARD typedef typename detail::bessel_traits::result_type result_type; @@ -143,17 +149,17 @@ inline std::complex::result_type> policies::discrete_quantile<>, policies::assert_undefined<> >::type forwarding_policy; - return policies::checked_narrowing_cast, Policy>(detail::sph_hankel_imp(static_cast(v), static_cast(x), forwarding_policy(), 1), "boost::math::sph_hankel_1<%1%>(%1%,%1%)"); + return policies::checked_narrowing_cast, Policy>(detail::sph_hankel_imp(static_cast(v), static_cast(x), forwarding_policy(), 1), "boost::math::sph_hankel_1<%1%>(%1%,%1%)"); } template -inline std::complex >::result_type> sph_hankel_1(T1 v, T2 x) +BOOST_MATH_GPU_ENABLED inline boost::math::complex >::result_type> sph_hankel_1(T1 v, T2 x) { return sph_hankel_1(v, x, policies::policy<>()); } template -inline std::complex::result_type> sph_hankel_2(T1 v, T2 x, const Policy&) +BOOST_MATH_GPU_ENABLED inline boost::math::complex::result_type> sph_hankel_2(T1 v, T2 x, const Policy&) { BOOST_FPU_EXCEPTION_GUARD typedef typename detail::bessel_traits::result_type result_type; @@ -165,11 +171,11 @@ inline std::complex::result_type> policies::discrete_quantile<>, policies::assert_undefined<> >::type forwarding_policy; - return policies::checked_narrowing_cast, Policy>(detail::sph_hankel_imp(static_cast(v), static_cast(x), forwarding_policy(), -1), "boost::math::sph_hankel_1<%1%>(%1%,%1%)"); + return policies::checked_narrowing_cast, Policy>(detail::sph_hankel_imp(static_cast(v), static_cast(x), forwarding_policy(), -1), "boost::math::sph_hankel_1<%1%>(%1%,%1%)"); } template -inline std::complex >::result_type> sph_hankel_2(T1 v, T2 x) +BOOST_MATH_GPU_ENABLED inline boost::math::complex >::result_type> sph_hankel_2(T1 v, T2 x) { return sph_hankel_2(v, x, policies::policy<>()); } diff --git a/include/boost/math/special_functions/hermite.hpp b/include/boost/math/special_functions/hermite.hpp index 81ccb2ac66..3d77fc03e3 100644 --- a/include/boost/math/special_functions/hermite.hpp +++ b/include/boost/math/special_functions/hermite.hpp @@ -1,5 +1,6 @@ // (C) Copyright John Maddock 2006. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -11,8 +12,9 @@ #pragma once #endif -#include #include +#include +#include #include namespace boost{ @@ -20,7 +22,7 @@ namespace math{ // Recurrence relation for Hermite polynomials: template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type hermite_next(unsigned n, T1 x, T2 Hn, T3 Hnm1) { using promoted_type = tools::promote_args_t; @@ -31,7 +33,7 @@ namespace detail{ // Implement Hermite polynomials via recurrence: template -T hermite_imp(unsigned n, T x) +BOOST_MATH_GPU_ENABLED T hermite_imp(unsigned n, T x) { T p0 = 1; T p1 = 2 * x; @@ -43,7 +45,7 @@ T hermite_imp(unsigned n, T x) while(c < n) { - std::swap(p0, p1); + BOOST_MATH_GPU_SAFE_SWAP(p0, p1); p1 = static_cast(hermite_next(c, x, p0, p1)); ++c; } @@ -53,7 +55,7 @@ T hermite_imp(unsigned n, T x) } // namespace detail template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type hermite(unsigned n, T x, const Policy&) { typedef typename tools::promote_args::type result_type; @@ -62,7 +64,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type hermite(unsigned n, T x) { return boost::math::hermite(n, x, policies::policy<>()); diff --git a/include/boost/math/special_functions/heuman_lambda.hpp b/include/boost/math/special_functions/heuman_lambda.hpp index 0fbf4a9803..05002725f2 100644 --- a/include/boost/math/special_functions/heuman_lambda.hpp +++ b/include/boost/math/special_functions/heuman_lambda.hpp @@ -1,4 +1,5 @@ // Copyright (c) 2015 John Maddock +// Copyright (c) 2024 Matt Borland // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -10,6 +11,9 @@ #pragma once #endif +#include +#include +#include #include #include #include @@ -26,13 +30,13 @@ namespace detail{ // Elliptic integral - Jacobi Zeta template -T heuman_lambda_imp(T phi, T k, const Policy& pol) +BOOST_MATH_GPU_ENABLED T heuman_lambda_imp(T phi, T k, const Policy& pol) { BOOST_MATH_STD_USING using namespace boost::math::tools; using namespace boost::math::constants; - const char* function = "boost::math::heuman_lambda<%1%>(%1%, %1%)"; + constexpr auto function = "boost::math::heuman_lambda<%1%>(%1%, %1%)"; if(fabs(k) > 1) return policies::raise_domain_error(function, "We require |k| <= 1 but got k = %1%", k, pol); @@ -51,10 +55,10 @@ T heuman_lambda_imp(T phi, T k, const Policy& pol) } else { - typedef std::integral_constant::value&& std::numeric_limits::digits && (std::numeric_limits::digits <= 54) ? 0 : - std::is_floating_point::value && std::numeric_limits::digits && (std::numeric_limits::digits <= 64) ? 1 : 2 - > precision_tag_type; + typedef boost::math::integral_constant::value && boost::math::numeric_limits::digits && (boost::math::numeric_limits::digits <= 54) ? 0 : + boost::math::is_floating_point::value && boost::math::numeric_limits::digits && (boost::math::numeric_limits::digits <= 64) ? 1 : 2 + > precision_tag_type; T rkp = sqrt(kp); T ratio; @@ -63,7 +67,9 @@ T heuman_lambda_imp(T phi, T k, const Policy& pol) return policies::raise_domain_error(function, "When 1-k^2 == 1 then phi must be < Pi/2, but got phi = %1%", phi, pol); } else + { ratio = ellint_f_imp(phi, rkp, pol, k2) / ellint_k_imp(rkp, pol, k2); + } result = ratio + ellint_k_imp(k, pol, precision_tag_type()) * jacobi_zeta_imp(phi, rkp, pol, k2) / constants::half_pi(); } return result; @@ -72,7 +78,7 @@ T heuman_lambda_imp(T phi, T k, const Policy& pol) } // detail template -inline typename tools::promote_args::type heuman_lambda(T1 k, T2 phi, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type heuman_lambda(T1 k, T2 phi, const Policy& pol) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -80,7 +86,7 @@ inline typename tools::promote_args::type heuman_lambda(T1 k, T2 phi, co } template -inline typename tools::promote_args::type heuman_lambda(T1 k, T2 phi) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type heuman_lambda(T1 k, T2 phi) { return boost::math::heuman_lambda(k, phi, policies::policy<>()); } diff --git a/include/boost/math/special_functions/hypot.hpp b/include/boost/math/special_functions/hypot.hpp index c56c751102..f38e37e872 100644 --- a/include/boost/math/special_functions/hypot.hpp +++ b/include/boost/math/special_functions/hypot.hpp @@ -12,20 +12,20 @@ #include #include +#include +#include #include #include -#include // for swap -#include namespace boost{ namespace math{ namespace detail{ template -T hypot_imp(T x, T y, const Policy& pol) +BOOST_MATH_GPU_ENABLED T hypot_imp(T x, T y, const Policy& pol) { // // Normalize x and y, so that both are positive and x >= y: // - using std::fabs; using std::sqrt; // ADL of std names + BOOST_MATH_STD_USING x = fabs(x); y = fabs(y); @@ -35,16 +35,16 @@ T hypot_imp(T x, T y, const Policy& pol) #pragma warning(disable: 4127) #endif // special case, see C99 Annex F: - if(std::numeric_limits::has_infinity - && ((x == std::numeric_limits::infinity()) - || (y == std::numeric_limits::infinity()))) + if(boost::math::numeric_limits::has_infinity + && ((x == boost::math::numeric_limits::infinity()) + || (y == boost::math::numeric_limits::infinity()))) return policies::raise_overflow_error("boost::math::hypot<%1%>(%1%,%1%)", nullptr, pol); #ifdef _MSC_VER #pragma warning(pop) #endif if(y > x) - (std::swap)(x, y); + BOOST_MATH_GPU_SAFE_SWAP(x, y); if(x * tools::epsilon() >= y) return x; @@ -56,7 +56,7 @@ T hypot_imp(T x, T y, const Policy& pol) } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type hypot(T1 x, T2 y) { typedef typename tools::promote_args::type result_type; @@ -65,7 +65,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type hypot(T1 x, T2 y, const Policy& pol) { typedef typename tools::promote_args::type result_type; diff --git a/include/boost/math/special_functions/jacobi_zeta.hpp b/include/boost/math/special_functions/jacobi_zeta.hpp index c4ba7d23d2..8b6f80912d 100644 --- a/include/boost/math/special_functions/jacobi_zeta.hpp +++ b/include/boost/math/special_functions/jacobi_zeta.hpp @@ -11,6 +11,8 @@ #pragma once #endif +#include +#include #include #include #include @@ -27,7 +29,7 @@ namespace detail{ // Elliptic integral - Jacobi Zeta template -T jacobi_zeta_imp(T phi, T k, const Policy& pol, T kp) +BOOST_MATH_GPU_ENABLED T jacobi_zeta_imp(T phi, T k, const Policy& pol, T kp) { BOOST_MATH_STD_USING using namespace boost::math::tools; @@ -55,14 +57,14 @@ T jacobi_zeta_imp(T phi, T k, const Policy& pol, T kp) return invert ? T(-result) : result; } template -inline T jacobi_zeta_imp(T phi, T k, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T jacobi_zeta_imp(T phi, T k, const Policy& pol) { return jacobi_zeta_imp(phi, k, pol, T(1 - k * k)); } } // detail template -inline typename tools::promote_args::type jacobi_zeta(T1 k, T2 phi, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type jacobi_zeta(T1 k, T2 phi, const Policy& pol) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -70,7 +72,7 @@ inline typename tools::promote_args::type jacobi_zeta(T1 k, T2 phi, cons } template -inline typename tools::promote_args::type jacobi_zeta(T1 k, T2 phi) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type jacobi_zeta(T1 k, T2 phi) { return boost::math::jacobi_zeta(k, phi, policies::policy<>()); } diff --git a/include/boost/math/special_functions/lanczos.hpp b/include/boost/math/special_functions/lanczos.hpp index d75a968cdb..0ec24bddbf 100644 --- a/include/boost/math/special_functions/lanczos.hpp +++ b/include/boost/math/special_functions/lanczos.hpp @@ -11,12 +11,16 @@ #endif #include -#include #include +#include +#include +#include #include -#include -#include + +#ifndef BOOST_MATH_HAS_NVRTC +#include #include +#endif #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128) // @@ -48,7 +52,7 @@ namespace boost{ namespace math{ namespace lanczos{ // Default version assumes all g() values are the same. // template -inline double lanczos_g_near_1_and_2(const L&) +BOOST_MATH_GPU_ENABLED inline double lanczos_g_near_1_and_2(const L&) { return L::g(); } @@ -59,17 +63,17 @@ inline double lanczos_g_near_1_and_2(const L&) // Max experimental error (with arbitrary precision arithmetic) 9.516e-12 // Generated with compiler: Microsoft Visual C++ version 8.0 on Win32 at Mar 23 2006 // -struct lanczos6 : public std::integral_constant +struct lanczos6 : public boost::math::integral_constant { // // Produces slightly better than float precision when evaluated at // double precision: // template - static T lanczos_sum(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z) { // LCOV_EXCL_START - static const T num[6] = { + BOOST_MATH_STATIC const T num[6] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 8706.349592549009182288174442774377925882)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 8523.650341121874633477483696775067709735)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 3338.029219476423550899999750161289306564)), @@ -77,23 +81,23 @@ struct lanczos6 : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 63.99951844938187085666201263218840287667)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 2.506628274631006311133031631822390264407)) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint16_t) denom[6] = { - static_cast(0u), - static_cast(24u), - static_cast(50u), - static_cast(35u), - static_cast(10u), - static_cast(1u) + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint16_t) denom[6] = { + static_cast(0u), + static_cast(24u), + static_cast(50u), + static_cast(35u), + static_cast(10u), + static_cast(1u) }; // LCOV_EXCL_STOP return boost::math::tools::evaluate_rational(num, denom, z); } template - static T lanczos_sum_expG_scaled(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z) { // LCOV_EXCL_START - static const T num[6] = { + BOOST_MATH_STATIC const T num[6] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 32.81244541029783471623665933780748627823)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 32.12388941444332003446077108933558534361)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 12.58034729455216106950851080138931470954)), @@ -101,13 +105,13 @@ struct lanczos6 : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 0.2412010548258800231126240760264822486599)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 0.009446967704539249494420221613134244048319)) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint16_t) denom[6] = { - static_cast(0u), - static_cast(24u), - static_cast(50u), - static_cast(35u), - static_cast(10u), - static_cast(1u) + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint16_t) denom[6] = { + static_cast(0u), + static_cast(24u), + static_cast(50u), + static_cast(35u), + static_cast(10u), + static_cast(1u) }; // LCOV_EXCL_STOP return boost::math::tools::evaluate_rational(num, denom, z); @@ -115,10 +119,10 @@ struct lanczos6 : public std::integral_constant template - static T lanczos_sum_near_1(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz) { // LCOV_EXCL_START - static const T d[5] = { + BOOST_MATH_STATIC const T d[5] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 2.044879010930422922760429926121241330235)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, -2.751366405578505366591317846728753993668)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 1.02282965224225004296750609604264824677)), @@ -135,10 +139,10 @@ struct lanczos6 : public std::integral_constant } template - static T lanczos_sum_near_2(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz) { // LCOV_EXCL_START - static const T d[5] = { + BOOST_MATH_STATIC const T d[5] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 5.748142489536043490764289256167080091892)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, -7.734074268282457156081021756682138251825)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 2.875167944990511006997713242805893543947)), @@ -155,7 +159,7 @@ struct lanczos6 : public std::integral_constant return result; } - static double g(){ return 5.581000000000000405009359383257105946541; } + BOOST_MATH_GPU_ENABLED static double g(){ return 5.581000000000000405009359383257105946541; } }; // @@ -163,17 +167,17 @@ struct lanczos6 : public std::integral_constant // Max experimental error (with arbitrary precision arithmetic) 2.16676e-19 // Generated with compiler: Microsoft Visual C++ version 8.0 on Win32 at Mar 23 2006 // -struct lanczos11 : public std::integral_constant +struct lanczos11 : public boost::math::integral_constant { // // Produces slightly better than double precision when evaluated at // extended-double precision: // template - static T lanczos_sum(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z) { // LCOV_EXCL_START - static const T num[11] = { + BOOST_MATH_STATIC const T num[11] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 38474670393.31776828316099004518914832218)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 36857665043.51950660081971227404959150474)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 15889202453.72942008945006665994637853242)), @@ -186,28 +190,28 @@ struct lanczos11 : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 261.6140441641668190791708576058805625502)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 2.506628274631000502415573855452633787834)) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint32_t) denom[11] = { - static_cast(0u), - static_cast(362880u), - static_cast(1026576u), - static_cast(1172700u), - static_cast(723680u), - static_cast(269325u), - static_cast(63273u), - static_cast(9450u), - static_cast(870u), - static_cast(45u), - static_cast(1u) + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint32_t) denom[11] = { + static_cast(0u), + static_cast(362880u), + static_cast(1026576u), + static_cast(1172700u), + static_cast(723680u), + static_cast(269325u), + static_cast(63273u), + static_cast(9450u), + static_cast(870u), + static_cast(45u), + static_cast(1u) }; // LCOV_EXCL_STOP return boost::math::tools::evaluate_rational(num, denom, z); } template - static T lanczos_sum_expG_scaled(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z) { // LCOV_EXCL_START - static const T num[11] = { + BOOST_MATH_STATIC const T num[11] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 709811.662581657956893540610814842699825)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 679979.847415722640161734319823103390728)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 293136.785721159725251629480984140341656)), @@ -220,18 +224,18 @@ struct lanczos11 : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 0.004826466289237661857584712046231435101741)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 0.4624429436045378766270459638520555557321e-4)) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint32_t) denom[11] = { - static_cast(0u), - static_cast(362880u), - static_cast(1026576u), - static_cast(1172700u), - static_cast(723680u), - static_cast(269325u), - static_cast(63273u), - static_cast(9450u), - static_cast(870u), - static_cast(45u), - static_cast(1u) + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint32_t) denom[11] = { + static_cast(0u), + static_cast(362880u), + static_cast(1026576u), + static_cast(1172700u), + static_cast(723680u), + static_cast(269325u), + static_cast(63273u), + static_cast(9450u), + static_cast(870u), + static_cast(45u), + static_cast(1u) }; // LCOV_EXCL_STOP return boost::math::tools::evaluate_rational(num, denom, z); @@ -239,10 +243,10 @@ struct lanczos11 : public std::integral_constant template - static T lanczos_sum_near_1(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz) { // LCOV_EXCL_START - static const T d[10] = { + BOOST_MATH_STATIC const T d[10] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 4.005853070677940377969080796551266387954)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, -13.17044315127646469834125159673527183164)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 17.19146865350790353683895137079288129318)), @@ -264,10 +268,10 @@ struct lanczos11 : public std::integral_constant } template - static T lanczos_sum_near_2(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz) { // LCOV_EXCL_START - static const T d[10] = { + BOOST_MATH_STATIC const T d[10] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 19.05889633808148715159575716844556056056)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, -62.66183664701721716960978577959655644762)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 81.7929198065004751699057192860287512027)), @@ -289,7 +293,7 @@ struct lanczos11 : public std::integral_constant return result; } - static double g(){ return 10.90051099999999983936049829935654997826; } + BOOST_MATH_GPU_ENABLED static double g(){ return 10.90051099999999983936049829935654997826; } }; // @@ -297,17 +301,17 @@ struct lanczos11 : public std::integral_constant // Max experimental error (with arbitrary precision arithmetic) 9.2213e-23 // Generated with compiler: Microsoft Visual C++ version 8.0 on Win32 at Mar 23 2006 // -struct lanczos13 : public std::integral_constant +struct lanczos13 : public boost::math::integral_constant { // // Produces slightly better than extended-double precision when evaluated at // higher precision: // template - static T lanczos_sum(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z) { // LCOV_EXCL_START - static const T num[13] = { + BOOST_MATH_STATIC const T num[13] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 44012138428004.60895436261759919070125699)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 41590453358593.20051581730723108131357995)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 18013842787117.99677796276038389462742949)), @@ -322,30 +326,30 @@ struct lanczos13 : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 381.8801248632926870394389468349331394196)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 2.506628274631000502415763426076722427007)) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint32_t) denom[13] = { - static_cast(0u), - static_cast(39916800u), - static_cast(120543840u), - static_cast(150917976u), - static_cast(105258076u), - static_cast(45995730u), - static_cast(13339535u), - static_cast(2637558u), - static_cast(357423u), - static_cast(32670u), - static_cast(1925u), - static_cast(66u), - static_cast(1u) + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint32_t) denom[13] = { + static_cast(0u), + static_cast(39916800u), + static_cast(120543840u), + static_cast(150917976u), + static_cast(105258076u), + static_cast(45995730u), + static_cast(13339535u), + static_cast(2637558u), + static_cast(357423u), + static_cast(32670u), + static_cast(1925u), + static_cast(66u), + static_cast(1u) }; // LCOV_EXCL_STOP return boost::math::tools::evaluate_rational(num, denom, z); } template - static T lanczos_sum_expG_scaled(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z) { // LCOV_EXCL_START - static const T num[13] = { + BOOST_MATH_STATIC const T num[13] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 86091529.53418537217994842267760536134841)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 81354505.17858011242874285785316135398567)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 35236626.38815461910817650960734605416521)), @@ -360,20 +364,20 @@ struct lanczos13 : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 0.0007469903808915448316510079585999893674101)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 0.4903180573459871862552197089738373164184e-5)) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint32_t) denom[13] = { - static_cast(0u), - static_cast(39916800u), - static_cast(120543840u), - static_cast(150917976u), - static_cast(105258076u), - static_cast(45995730u), - static_cast(13339535u), - static_cast(2637558u), - static_cast(357423u), - static_cast(32670u), - static_cast(1925u), - static_cast(66u), - static_cast(1u) + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint32_t) denom[13] = { + static_cast(0u), + static_cast(39916800u), + static_cast(120543840u), + static_cast(150917976u), + static_cast(105258076u), + static_cast(45995730u), + static_cast(13339535u), + static_cast(2637558u), + static_cast(357423u), + static_cast(32670u), + static_cast(1925u), + static_cast(66u), + static_cast(1u) }; // LCOV_EXCL_STOP return boost::math::tools::evaluate_rational(num, denom, z); @@ -381,10 +385,10 @@ struct lanczos13 : public std::integral_constant template - static T lanczos_sum_near_1(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz) { // LCOV_EXCL_START - static const T d[12] = { + BOOST_MATH_STATIC const T d[12] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 4.832115561461656947793029596285626840312)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, -19.86441536140337740383120735104359034688)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 33.9927422807443239927197864963170585331)), @@ -408,10 +412,10 @@ struct lanczos13 : public std::integral_constant } template - static T lanczos_sum_near_2(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz) { // LCOV_EXCL_START - static const T d[12] = { + BOOST_MATH_STATIC const T d[12] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 26.96979819614830698367887026728396466395)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, -110.8705424709385114023884328797900204863)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 189.7258846119231466417015694690434770085)), @@ -435,7 +439,7 @@ struct lanczos13 : public std::integral_constant return result; } - static double g(){ return 13.1445650000000000545696821063756942749; } + BOOST_MATH_GPU_ENABLED static double g(){ return 13.1445650000000000545696821063756942749; } }; // @@ -443,16 +447,16 @@ struct lanczos13 : public std::integral_constant // Max experimental error (with arbitrary precision arithmetic) 8.111667e-8 // Generated with compiler: Microsoft Visual C++ version 8.0 on Win32 at Mar 23 2006 // -struct lanczos6m24 : public std::integral_constant +struct lanczos6m24 : public boost::math::integral_constant { // // Use for float precision, when evaluated as a float: // template - static T lanczos_sum(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z) { // LCOV_EXCL_START - static const T num[6] = { + BOOST_MATH_STATIC const T num[6] = { static_cast(58.52061591769095910314047740215847630266L), static_cast(182.5248962595894264831189414768236280862L), static_cast(211.0971093028510041839168287718170827259L), @@ -460,23 +464,23 @@ struct lanczos6m24 : public std::integral_constant static_cast(27.5192015197455403062503721613097825345L), static_cast(2.50662858515256974113978724717473206342L) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint16_t) denom[6] = { - static_cast(0u), - static_cast(24u), - static_cast(50u), - static_cast(35u), - static_cast(10u), - static_cast(1u) + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint16_t) denom[6] = { + static_cast(0u), + static_cast(24u), + static_cast(50u), + static_cast(35u), + static_cast(10u), + static_cast(1u) }; // LCOV_EXCL_STOP return boost::math::tools::evaluate_rational(num, denom, z); } template - static T lanczos_sum_expG_scaled(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z) { // LCOV_EXCL_START - static const T num[6] = { + BOOST_MATH_STATIC const T num[6] = { static_cast(14.0261432874996476619570577285003839357L), static_cast(43.74732405540314316089531289293124360129L), static_cast(50.59547402616588964511581430025589038612L), @@ -484,13 +488,13 @@ struct lanczos6m24 : public std::integral_constant static_cast(6.595765571169314946316366571954421695196L), static_cast(0.6007854010515290065101128585795542383721L) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint16_t) denom[6] = { - static_cast(0u), - static_cast(24u), - static_cast(50u), - static_cast(35u), - static_cast(10u), - static_cast(1u) + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint16_t) denom[6] = { + static_cast(0u), + static_cast(24u), + static_cast(50u), + static_cast(35u), + static_cast(10u), + static_cast(1u) }; // LCOV_EXCL_STOP return boost::math::tools::evaluate_rational(num, denom, z); @@ -498,10 +502,10 @@ struct lanczos6m24 : public std::integral_constant template - static T lanczos_sum_near_1(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz) { // LCOV_EXCL_START - static const T d[5] = { + BOOST_MATH_STATIC const T d[5] = { static_cast(0.4922488055204602807654354732674868442106L), static_cast(0.004954497451132152436631238060933905650346L), static_cast(-0.003374784572167105840686977985330859371848L), @@ -518,10 +522,10 @@ struct lanczos6m24 : public std::integral_constant } template - static T lanczos_sum_near_2(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz) { // LCOV_EXCL_START - static const T d[5] = { + BOOST_MATH_STATIC const T d[5] = { static_cast(0.6534966888520080645505805298901130485464L), static_cast(0.006577461728560758362509168026049182707101L), static_cast(-0.004480276069269967207178373559014835978161L), @@ -538,7 +542,7 @@ struct lanczos6m24 : public std::integral_constant return result; } - static double g(){ return 1.428456135094165802001953125; } + BOOST_MATH_GPU_ENABLED static double g(){ return 1.428456135094165802001953125; } }; // @@ -546,16 +550,16 @@ struct lanczos6m24 : public std::integral_constant // Max experimental error (with arbitrary precision arithmetic) 1.196214e-17 // Generated with compiler: Microsoft Visual C++ version 8.0 on Win32 at Mar 23 2006 // -struct lanczos13m53 : public std::integral_constant +struct lanczos13m53 : public boost::math::integral_constant { // // Use for double precision, when evaluated as a double: // template - static T lanczos_sum(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z) { // LCOV_EXCL_START - static const T num[13] = { + BOOST_MATH_STATIC const T num[13] = { static_cast(23531376880.41075968857200767445163675473L), static_cast(42919803642.64909876895789904700198885093L), static_cast(35711959237.35566804944018545154716670596L), @@ -570,30 +574,30 @@ struct lanczos13m53 : public std::integral_constant static_cast(210.8242777515793458725097339207133627117L), static_cast(2.506628274631000270164908177133837338626L) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint32_t) denom[13] = { - static_cast(0u), - static_cast(39916800u), - static_cast(120543840u), - static_cast(150917976u), - static_cast(105258076u), - static_cast(45995730u), - static_cast(13339535u), - static_cast(2637558u), - static_cast(357423u), - static_cast(32670u), - static_cast(1925u), - static_cast(66u), - static_cast(1u) + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint32_t) denom[13] = { + static_cast(0u), + static_cast(39916800u), + static_cast(120543840u), + static_cast(150917976u), + static_cast(105258076u), + static_cast(45995730u), + static_cast(13339535u), + static_cast(2637558u), + static_cast(357423u), + static_cast(32670u), + static_cast(1925u), + static_cast(66u), + static_cast(1u) }; // LCOV_EXCL_STOP return boost::math::tools::evaluate_rational(num, denom, z); } template - static T lanczos_sum_expG_scaled(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z) { // LCOV_EXCL_START - static const T num[13] = { + BOOST_MATH_STATIC const T num[13] = { static_cast(56906521.91347156388090791033559122686859L), static_cast(103794043.1163445451906271053616070238554L), static_cast(86363131.28813859145546927288977868422342L), @@ -608,20 +612,20 @@ struct lanczos13m53 : public std::integral_constant static_cast(0.5098416655656676188125178644804694509993L), static_cast(0.006061842346248906525783753964555936883222L) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint32_t) denom[13] = { - static_cast(0u), - static_cast(39916800u), - static_cast(120543840u), - static_cast(150917976u), - static_cast(105258076u), - static_cast(45995730u), - static_cast(13339535u), - static_cast(2637558u), - static_cast(357423u), - static_cast(32670u), - static_cast(1925u), - static_cast(66u), - static_cast(1u) + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint32_t) denom[13] = { + static_cast(0u), + static_cast(39916800u), + static_cast(120543840u), + static_cast(150917976u), + static_cast(105258076u), + static_cast(45995730u), + static_cast(13339535u), + static_cast(2637558u), + static_cast(357423u), + static_cast(32670u), + static_cast(1925u), + static_cast(66u), + static_cast(1u) }; // LCOV_EXCL_STOP return boost::math::tools::evaluate_rational(num, denom, z); @@ -629,10 +633,10 @@ struct lanczos13m53 : public std::integral_constant template - static T lanczos_sum_near_1(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz) { // LCOV_EXCL_START - static const T d[12] = { + BOOST_MATH_STATIC const T d[12] = { static_cast(2.208709979316623790862569924861841433016L), static_cast(-3.327150580651624233553677113928873034916L), static_cast(1.483082862367253753040442933770164111678L), @@ -656,10 +660,10 @@ struct lanczos13m53 : public std::integral_constant } template - static T lanczos_sum_near_2(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz) { // LCOV_EXCL_START - static const T d[12] = { + BOOST_MATH_STATIC const T d[12] = { static_cast(6.565936202082889535528455955485877361223L), static_cast(-9.8907772644920670589288081640128194231L), static_cast(4.408830289125943377923077727900630927902L), @@ -683,7 +687,7 @@ struct lanczos13m53 : public std::integral_constant return result; } - static double g(){ return 6.024680040776729583740234375; } + BOOST_MATH_GPU_ENABLED static double g(){ return 6.024680040776729583740234375; } }; // @@ -691,16 +695,16 @@ struct lanczos13m53 : public std::integral_constant // Max experimental error (with arbitrary precision arithmetic) 2.7699e-26 // Generated with compiler: Microsoft Visual C++ version 8.0 on Win32 at Mar 23 2006 // -struct lanczos17m64 : public std::integral_constant +struct lanczos17m64 : public boost::math::integral_constant { // // Use for extended-double precision, when evaluated as an extended-double: // template - static T lanczos_sum(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z) { // LCOV_EXCL_START - static const T num[17] = { + BOOST_MATH_STATIC const T num[17] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 553681095419291969.2230556393350368550504)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 731918863887667017.2511276782146694632234)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 453393234285807339.4627124634539085143364)), @@ -719,7 +723,7 @@ struct lanczos17m64 : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 488.0063567520005730476791712814838113252)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.50662827463100050241576877135758834683)) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint64_t) denom[17] = { + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint64_t) denom[17] = { BOOST_MATH_INT_VALUE_SUFFIX(0, uLL), BOOST_MATH_INT_VALUE_SUFFIX(1307674368000, uLL), BOOST_MATH_INT_VALUE_SUFFIX(4339163001600, uLL), @@ -743,10 +747,10 @@ struct lanczos17m64 : public std::integral_constant } template - static T lanczos_sum_expG_scaled(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z) { // LCOV_EXCL_START - static const T num[17] = { + BOOST_MATH_STATIC const T num[17] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2715894658327.717377557655133124376674911)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 3590179526097.912105038525528721129550434)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2223966599737.814969312127353235818710172)), @@ -765,7 +769,7 @@ struct lanczos17m64 : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.002393749522058449186690627996063983095463)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.1229541408909435212800785616808830746135e-4)) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint64_t) denom[17] = { + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint64_t) denom[17] = { BOOST_MATH_INT_VALUE_SUFFIX(0, uLL), BOOST_MATH_INT_VALUE_SUFFIX(1307674368000, uLL), BOOST_MATH_INT_VALUE_SUFFIX(4339163001600, uLL), @@ -790,10 +794,10 @@ struct lanczos17m64 : public std::integral_constant template - static T lanczos_sum_near_1(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz) { // LCOV_EXCL_START - static const T d[16] = { + BOOST_MATH_STATIC const T d[16] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 4.493645054286536365763334986866616581265)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -16.95716370392468543800733966378143997694)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 26.19196892983737527836811770970479846644)), @@ -821,10 +825,10 @@ struct lanczos17m64 : public std::integral_constant } template - static T lanczos_sum_near_2(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz) { // LCOV_EXCL_START - static const T d[16] = { + BOOST_MATH_STATIC const T d[16] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 23.56409085052261327114594781581930373708)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -88.92116338946308797946237246006238652361)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 137.3472822086847596961177383569603988797)), @@ -852,7 +856,7 @@ struct lanczos17m64 : public std::integral_constant return result; } - static double g(){ return 12.2252227365970611572265625; } + BOOST_MATH_GPU_ENABLED static double g(){ return 12.2252227365970611572265625; } }; // @@ -860,16 +864,16 @@ struct lanczos17m64 : public std::integral_constant // Max experimental error (with arbitrary precision arithmetic) 1.0541e-38 // Generated with compiler: Microsoft Visual C++ version 8.0 on Win32 at Mar 23 2006 // -struct lanczos24m113 : public std::integral_constant +struct lanczos24m113 : public boost::math::integral_constant { // // Use for long-double precision, when evaluated as an long-double: // template - static T lanczos_sum(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z) { // LCOV_EXCL_START - static const T num[24] = { + BOOST_MATH_STATIC const T num[24] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 2029889364934367661624137213253.22102954656825019111612712252027267955023987678816620961507)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 2338599599286656537526273232565.2727349714338768161421882478417543004440597874814359063158)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 1288527989493833400335117708406.3953711906175960449186720680201425446299360322830739180195)), @@ -895,7 +899,7 @@ struct lanczos24m113 : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 1151.61895453463992438325318456328526085882924197763140514450975619271382783957699017875304)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 2.50662827463100050241576528481104515966515623051532908941425544355490413900497467936202516)) }; - static const T denom[24] = { + BOOST_MATH_STATIC const T denom[24] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 0.0)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 0.112400072777760768e22)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 0.414847677933545472e22)), @@ -926,10 +930,10 @@ struct lanczos24m113 : public std::integral_constant } template - static T lanczos_sum_expG_scaled(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z) { // LCOV_EXCL_START - static const T num[24] = { + BOOST_MATH_STATIC const T num[24] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 3035162425359883494754.02878223286972654682199012688209026810841953293372712802258398358538)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 3496756894406430103600.16057175075063458536101374170860226963245118484234495645518505519827)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 1926652656689320888654.01954015145958293168365236755537645929361841917596501251362171653478)), @@ -955,7 +959,7 @@ struct lanczos24m113 : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 0.172194142179211139195966608011235161516824700287310869949928393345257114743230967204370963e-5)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 0.374799931707148855771381263542708435935402853962736029347951399323367765509988401336565436e-8)) }; - static const T denom[24] = { + BOOST_MATH_STATIC const T denom[24] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 0.0)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 0.112400072777760768e22)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 0.414847677933545472e22)), @@ -987,10 +991,10 @@ struct lanczos24m113 : public std::integral_constant template - static T lanczos_sum_near_1(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz) { // LCOV_EXCL_START - static const T d[23] = { + BOOST_MATH_STATIC const T d[23] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 7.4734083002469026177867421609938203388868806387315406134072298925733950040583068760685908)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, -50.4225805042247530267317342133388132970816607563062253708655085754357843064134941138154171)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 152.288200621747008570784082624444625293884063492396162110698238568311211546361189979357019)), @@ -1025,10 +1029,10 @@ struct lanczos24m113 : public std::integral_constant } template - static T lanczos_sum_near_2(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz) { // LCOV_EXCL_START - static const T d[23] = { + BOOST_MATH_STATIC const T d[23] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 61.4165001061101455341808888883960361969557848005400286332291451422461117307237198559485365)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, -414.372973678657049667308134761613915623353625332248315105320470271523320700386200587519147)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 1251.50505818554680171298972755376376836161706773644771875668053742215217922228357204561873)), @@ -1063,7 +1067,7 @@ struct lanczos24m113 : public std::integral_constant return result; } - static double g(){ return 20.3209821879863739013671875; } + BOOST_MATH_GPU_ENABLED static double g(){ return 20.3209821879863739013671875; } }; // @@ -1072,13 +1076,13 @@ struct lanczos24m113 : public std::integral_constant // Generated with compiler: Microsoft Visual C++ version 14.2 on Win32 at May 23 2021 // Type precision was 134 bits or 42 max_digits10 // -struct lanczos27MP : public std::integral_constant +struct lanczos27MP : public boost::math::integral_constant { template - static T lanczos_sum(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z) { // LCOV_EXCL_START - static const T num[27] = { + BOOST_MATH_STATIC const T num[27] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, 2.532923291341302819860952064783714673718970e+36)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, 2.715272050979243637524956158081893927075092e+36)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, 1.399396313336459710065708403038293278484916e+36)), @@ -1107,7 +1111,7 @@ struct lanczos27MP : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, 1.580741273679785112052701460119954412080073e+03)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, 2.506628274631000502415765284811045253005320e+00)) }; - static const T denom[27] = { + BOOST_MATH_STATIC const T denom[27] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, 0.000000000000000000000000000000000000000000e+00)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, 1.551121004333098598400000000000000000000000e+25)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, 5.919012881170120359936000000000000000000000e+25)), @@ -1141,10 +1145,10 @@ struct lanczos27MP : public std::integral_constant } template - static T lanczos_sum_expG_scaled(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z) { // LCOV_EXCL_START - static const T num[27] = { + BOOST_MATH_STATIC const T num[27] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, 4.630539114451826442425094380936505531231478e+25)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, 4.963898228350662244301785145431331232866294e+25)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, 2.558292778812387748738731408569861630189290e+25)), @@ -1173,7 +1177,7 @@ struct lanczos27MP : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, 2.889816806780013044430000551700375309307825e-08)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, 4.582468135039046226997146555551548992616343e-11)) }; - static const T denom[27] = { + BOOST_MATH_STATIC const T denom[27] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, 0.000000000000000000000000000000000000000000e+00)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, 1.551121004333098598400000000000000000000000e+25)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, 5.919012881170120359936000000000000000000000e+25)), @@ -1208,10 +1212,10 @@ struct lanczos27MP : public std::integral_constant template - static T lanczos_sum_near_1(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz) { // LCOV_EXCL_START - static const T d[34] = { + BOOST_MATH_STATIC const T d[34] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, 6.264579889722939745225908247624593169040293e+00)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, -3.470545597111704235784909052092266897169254e+01)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, 8.398164226943527197542310295220360303173237e+01)), @@ -1257,10 +1261,10 @@ struct lanczos27MP : public std::integral_constant } template - static T lanczos_sum_near_2(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz) { // LCOV_EXCL_START - static const T d[34] = { + BOOST_MATH_STATIC const T d[34] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, 4.391991857844535020743473289228849738381662e+01)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, -2.433141291692735004291785549611375831426138e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 134, 5.887812040849956173864447000497922705559488e+02)), @@ -1306,10 +1310,10 @@ struct lanczos27MP : public std::integral_constant return result; } - static double g() { return 2.472513680905104038743047567550092935562134e+01; } + BOOST_MATH_GPU_ENABLED static double g() { return 2.472513680905104038743047567550092935562134e+01; } }; -inline double lanczos_g_near_1_and_2(const lanczos27MP&) +BOOST_MATH_GPU_ENABLED inline double lanczos_g_near_1_and_2(const lanczos27MP&) { return 17.03623256087303; } @@ -1320,13 +1324,13 @@ inline double lanczos_g_near_1_and_2(const lanczos27MP&) // Generated with compiler: Microsoft Visual C++ version 14.2 on Win32 at Oct 14 2019 // Type precision was 168 bits or 53 max_digits10 // -struct lanczos35MP : public std::integral_constant +struct lanczos35MP : public boost::math::integral_constant { template - static T lanczos_sum(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z) { // LCOV_EXCL_START - static const T num[35] = { + BOOST_MATH_STATIC const T num[35] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, 2.17215050716253100021302249837728942659410271586236104e+50)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, 2.51055117651708470336913962553466820524801246971658127e+50)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, 1.40813458996718289733677017073036013655624930344397267e+50)), @@ -1363,7 +1367,7 @@ struct lanczos35MP : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, 2.50897418653428667959996348205296461689142907811767371e+03)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, 2.50662827463100050241576528481104525300698674060984055e+00)) }; - static const T denom[35] = { + BOOST_MATH_STATIC const T denom[35] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, 0.00000000000000000000000000000000000000000000000000000e+00)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, 8.68331761881188649551819440128000000000000000000000000e+36)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, 3.55043336733310191803732770947072000000000000000000000e+37)), @@ -1405,10 +1409,10 @@ struct lanczos35MP : public std::integral_constant } template - static T lanczos_sum_expG_scaled(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z) { // LCOV_EXCL_START - static const T num[35] = { + BOOST_MATH_STATIC const T num[35] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, 2.84421398435712762388902267099927585742388886580864424e+37)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, 3.28731583799033736725852757551292030085556435695468295e+37)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, 1.84381150359300352571680869181416248982215282642834936e+37)), @@ -1445,7 +1449,7 @@ struct lanczos35MP : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, 3.28525092722679899458094768960179796663588010298597603e-10)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, 3.28217919006153582429216342066702743329957749672852350e-13)) }; - static const T denom[35] = { + BOOST_MATH_STATIC const T denom[35] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, 0.00000000000000000000000000000000000000000000000000000e+00)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, 8.68331761881188649551819440128000000000000000000000000e+36)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, 3.55043336733310191803732770947072000000000000000000000e+37)), @@ -1488,10 +1492,10 @@ struct lanczos35MP : public std::integral_constant template - static T lanczos_sum_near_1(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz) { // LCOV_EXCL_START - static const T d[42] = { + BOOST_MATH_STATIC const T d[42] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, 8.2258008829795701933757823508857131818190413131511363e+00)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, -6.1680809698202901664719598422224259984110345848176138e+01)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, 2.0937956909159916126016144892534179459545368045658870e+02)), @@ -1545,10 +1549,10 @@ struct lanczos35MP : public std::integral_constant } template - static T lanczos_sum_near_2(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz) { // LCOV_EXCL_START - static const T d[42] = { + BOOST_MATH_STATIC const T d[42] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, 7.3782193657165970743894979068466124765194827248379940e+01)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, -5.5325256602067816772285455933211570612342576586214891e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 168, 1.8780522570799869937961476290263461833002660531646012e+03)), @@ -1602,10 +1606,10 @@ struct lanczos35MP : public std::integral_constant return result; } - static double g() { return 2.96640371531248092651367187500000000000000000000000000e+01; } + BOOST_MATH_GPU_ENABLED static double g() { return 2.96640371531248092651367187500000000000000000000000000e+01; } }; -inline double lanczos_g_near_1_and_2(const lanczos35MP&) +BOOST_MATH_GPU_ENABLED inline double lanczos_g_near_1_and_2(const lanczos35MP&) { return 22.36563469469547; } @@ -1615,13 +1619,13 @@ inline double lanczos_g_near_1_and_2(const lanczos35MP&) // Generated with compiler: Microsoft Visual C++ version 14.2 on Win32 at Oct 14 2019 // Type precision was 201 bits or 63 max_digits10 // -struct lanczos48MP : public std::integral_constant +struct lanczos48MP : public boost::math::integral_constant { template - static T lanczos_sum(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z) { // LCOV_EXCL_START - static const T num[48] = { + BOOST_MATH_STATIC const T num[48] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, 5.761757987425932419978923296640371540367427757167447418730589877e+70)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, 8.723233313564421930629677035555276136256253817229396631458438691e+70)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, 6.460052620548943146316510839385235752729444155384745952604400014e+70)), @@ -1671,7 +1675,7 @@ struct lanczos48MP : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, 3.749690888961891063146468955091435916957208840312184463551812828e+03)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, 2.506628274631000502415765284811045253006986740609938316629929233e+00)) }; - static const T denom[48] = { + BOOST_MATH_STATIC const T denom[48] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, 0.000000000000000000000000000000000000000000000000000000000000000e+00)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, 5.502622159812088949850305428800254892961651752960000000000000000e+57)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, 2.430336111272256671478593169569751383305061494947840000000000000e+58)), @@ -1726,10 +1730,10 @@ struct lanczos48MP : public std::integral_constant } template - static T lanczos_sum_expG_scaled(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z) { // LCOV_EXCL_START - static const T num[48] = { + BOOST_MATH_STATIC const T num[48] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, 1.775732062655417998910881298714821053061055705608286949609421120e+58)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, 2.688437299644448784121592662352787426980194425446481703306505899e+58)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, 1.990941408817264621124181941423397180231807676408175000011574647e+58)), @@ -1779,7 +1783,7 @@ struct lanczos48MP : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, 1.155627562127299657410444702080985966726894475302009989071093439e-09)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, 7.725246714864934496649491688787278190129598018071339049048385845e-13)) }; - static const T denom[48] = { + BOOST_MATH_STATIC const T denom[48] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, 0.000000000000000000000000000000000000000000000000000000000000000e+00)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, 5.502622159812088949850305428800254892961651752960000000000000000e+57)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, 2.430336111272256671478593169569751383305061494947840000000000000e+58)), @@ -1835,10 +1839,10 @@ struct lanczos48MP : public std::integral_constant template - static T lanczos_sum_near_1(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz) { // LCOV_EXCL_START - static const T d[47] = { + BOOST_MATH_STATIC const T d[47] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, 1.059629332377126683204423480567078764834299559082175332563440691e+01)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, -1.045539783916612448318159279915745234781500064405838259582295756e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, 4.784116147862702971548198855631720823614071322755242269800139953e+02)), @@ -1897,10 +1901,10 @@ struct lanczos48MP : public std::integral_constant } template - static T lanczos_sum_near_2(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz) { // LCOV_EXCL_START - static const T d[47] = { + BOOST_MATH_STATIC const T d[47] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, 1.201442621036266842137537764128372139686555918574926377003612763e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, -1.185467427150643969519910927764836582205108528009141221591420898e+03)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 201, 5.424388386017623557963301151646679462091516489317860889362683594e+03)), @@ -1959,7 +1963,7 @@ struct lanczos48MP : public std::integral_constant return result; } - static double g() { return 2.880805098265409469604492187500000000000000000000000000000000000e+01; } + BOOST_MATH_GPU_ENABLED static double g() { return 2.880805098265409469604492187500000000000000000000000000000000000e+01; } }; // // Lanczos Coefficients for N=49 G=3.531905273437499914734871708787977695465087890625000000000000000000000000e+01 @@ -1967,13 +1971,13 @@ struct lanczos48MP : public std::integral_constant // Generated with compiler: Microsoft Visual C++ version 14.2 on Win32 at May 23 2021 // Type precision was 234 bits or 72 max_digits10 // -struct lanczos49MP : public std::integral_constant +struct lanczos49MP : public boost::math::integral_constant { template - static T lanczos_sum(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z) { // LCOV_EXCL_START - static const T num[49] = { + BOOST_MATH_STATIC const T num[49] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, 2.019754080776483553135944314398390557182640085494778723336498544843678485e+75)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, 2.676059842235360762770131859925648183945167646928679564649946220888559950e+75)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, 1.735650057396761011129552305882284776566019938011364428733911563803428382e+75)), @@ -2024,7 +2028,7 @@ struct lanczos49MP : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, 4.390800780998954208500039666019609185743083611214630479125238184115750385e+03)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, 2.506628274631000502415765284811045253006986740609938316629923576327386304e+00)) }; - static const T denom[49] = { + BOOST_MATH_STATIC const T denom[49] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, 0.000000000000000000000000000000000000000000000000000000000000000000000000e+00)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, 2.586232415111681806429643551536119799691976323891200000000000000000000000e+59)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, 1.147760594457772724544789095126583405046340554378444800000000000000000000e+60)), @@ -2080,10 +2084,10 @@ struct lanczos49MP : public std::integral_constant } template - static T lanczos_sum_expG_scaled(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z) { // LCOV_EXCL_START - static const T num[49] = { + BOOST_MATH_STATIC const T num[49] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, 9.256115936295239128792053510340342045264892843178101822334871337037830072e+59)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, 1.226382973449509462464247401218271019985727521806127065773488938845990367e+60)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, 7.954125855720840120393676022050001333138789037332565663424594891457273557e+59)), @@ -2134,7 +2138,7 @@ struct lanczos49MP : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, 2.012213341659767638341287600182102653785253052492980766472349845276996656e-12)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, 1.148735984247176123115370642724455566337349193609892794757225210307646070e-15)) }; - static const T denom[49] = { + BOOST_MATH_STATIC const T denom[49] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, 0.000000000000000000000000000000000000000000000000000000000000000000000000e+00)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, 2.586232415111681806429643551536119799691976323891200000000000000000000000e+59)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, 1.147760594457772724544789095126583405046340554378444800000000000000000000e+60)), @@ -2191,10 +2195,10 @@ struct lanczos49MP : public std::integral_constant template - static T lanczos_sum_near_1(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz) { // LCOV_EXCL_START - static const T d[48] = { + BOOST_MATH_STATIC const T d[48] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, 1.233965513689195496302526816415068018137532804347903252026160914018410959e+01)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, -1.432567696701419045483804034990696504881298696037704685583731202573594084e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, 7.800990151010204780591569831451389602736047219596430673280355834870101274e+02)), @@ -2254,10 +2258,10 @@ struct lanczos49MP : public std::integral_constant } template - static T lanczos_sum_near_2(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz) { // LCOV_EXCL_START - static const T d[48] = { + BOOST_MATH_STATIC const T d[48] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, 1.614127734928823683399031924928203896697519780457812139739363243361356121e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, -1.873915620620241270111954934939697069495813017577862172724257417200307532e+03)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 234, 1.020433263568799913803105156119729477192007677199414299858195073560627451e+04)), @@ -2317,10 +2321,10 @@ struct lanczos49MP : public std::integral_constant return result; } - static double g() { return 3.531905273437499914734871708787977695465087890625000000000000000000000000e+01; } + BOOST_MATH_GPU_ENABLED static double g() { return 3.531905273437499914734871708787977695465087890625000000000000000000000000e+01; } }; -inline double lanczos_g_near_1_and_2(const lanczos49MP&) +BOOST_MATH_GPU_ENABLED inline double lanczos_g_near_1_and_2(const lanczos49MP&) { return 33.54638671875000; } @@ -2331,13 +2335,13 @@ inline double lanczos_g_near_1_and_2(const lanczos49MP&) // Generated with compiler: Microsoft Visual C++ version 14.2 on Win32 at May 22 2021 // Type precision was 267 bits or 82 max_digits10 // -struct lanczos52MP : public std::integral_constant +struct lanczos52MP : public boost::math::integral_constant { template - static T lanczos_sum(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z) { // LCOV_EXCL_START - static const T num[52] = { + BOOST_MATH_STATIC const T num[52] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, 6.2155666558597192337239536765115831322604714024167432764126799013946738944179064162e+86)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, 6.4127424062560995063147129656553600039438028633959646865531341376543275935920940510e+86)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, 3.2432219642804430367752303997394644425738553439619047355470691880100895245432999409e+86)), @@ -2391,7 +2395,7 @@ struct lanczos52MP : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, 6.3192906485096381210566149918556620595525679738152760526187454875638091923687554946e+03)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, 2.5066282746310005024157652848110452530069867406099383166299235763422936546004304390e+00)) }; - static const T denom[52] = { + BOOST_MATH_STATIC const T denom[52] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000e+00)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, 3.0414093201713378043612608166064768844377641568960512000000000000000000000000000000e+64)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, 1.3683925049359750564345782687270252191318781054337155072000000000000000000000000000e+65)), @@ -2450,10 +2454,10 @@ struct lanczos52MP : public std::integral_constant } template - static T lanczos_sum_expG_scaled(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z) { // LCOV_EXCL_START - static const T num[52] = { + BOOST_MATH_STATIC const T num[52] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, 1.2968364952374867351881152115042817894191583875220489481700563388077315440993668645e+65)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, 1.3379758994539627857606593702434364057385206718035611620158459666404856221820703129e+65)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, 6.7667661507089657936560642518188013126674666141084536651063996312630940638352438169e+64)), @@ -2507,7 +2511,7 @@ struct lanczos52MP : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, 1.3184778139696006596104645792244972612333458493576785210966728195969324996631733257e-18)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, 5.2299125832253333486600023635817464870204660970908989075481425992405717273229096642e-22)) }; - static const T denom[52] = { + BOOST_MATH_STATIC const T denom[52] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000e+00)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, 3.0414093201713378043612608166064768844377641568960512000000000000000000000000000000e+64)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, 1.3683925049359750564345782687270252191318781054337155072000000000000000000000000000e+65)), @@ -2567,10 +2571,10 @@ struct lanczos52MP : public std::integral_constant template - static T lanczos_sum_near_1(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz) { // LCOV_EXCL_START - static const T d[56] = { + BOOST_MATH_STATIC const T d[56] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, 1.4249481633301349696310814410227012806541100102720500928500445853537331413655453290e+01)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, -1.9263209672927829270913652941762375058727326960303110137656951784697992824730035351e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, 1.2326134462101140657073655882621393643823409472993225649429843685598155061860815843e+03)), @@ -2638,10 +2642,10 @@ struct lanczos52MP : public std::integral_constant } template - static T lanczos_sum_near_2(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz) { // LCOV_EXCL_START - static const T d[56] = { + BOOST_MATH_STATIC const T d[56] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, 2.1359871474796665853092357455924330354587340093067807143261699873815704783987359772e+02)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, -2.8875414095359657817766255009397774415784763914903057809977502598124862632510767554e+03)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 267, 1.8476787764422274017528261804071971508619123082396685980448133660376964287516316704e+04)), @@ -2709,10 +2713,10 @@ struct lanczos52MP : public std::integral_constant return result; } - static double g() { return 4.9921416015624998863131622783839702606201171875000000000000000000000000000000000000e+01; } + BOOST_MATH_GPU_ENABLED static double g() { return 4.9921416015624998863131622783839702606201171875000000000000000000000000000000000000e+01; } }; -inline double lanczos_g_near_1_and_2(const lanczos52MP&) +BOOST_MATH_GPU_ENABLED inline double lanczos_g_near_1_and_2(const lanczos52MP&) { return 38.73733398437500; } @@ -2721,24 +2725,24 @@ inline double lanczos_g_near_1_and_2(const lanczos52MP&) // // placeholder for no lanczos info available: // -struct undefined_lanczos : public std::integral_constant::max)() - 1> { }; +struct undefined_lanczos : public boost::math::integral_constant::max)() - 1> { }; template struct lanczos { - static constexpr auto target_precision = policies::precision::type::value <= 0 ? (std::numeric_limits::max)()-2 : + BOOST_MATH_STATIC constexpr auto target_precision = policies::precision::type::value <= 0 ? (boost::math::numeric_limits::max)()-2 : policies::precision::type::value; - using type = typename std::conditional<(target_precision <= lanczos6m24::value), lanczos6m24, - typename std::conditional<(target_precision <= lanczos13m53::value), lanczos13m53, - typename std::conditional<(target_precision <= lanczos11::value), lanczos11, - typename std::conditional<(target_precision <= lanczos17m64::value), lanczos17m64, - typename std::conditional<(target_precision <= lanczos24m113::value), lanczos24m113, - typename std::conditional<(target_precision <= lanczos27MP::value), lanczos27MP, - typename std::conditional<(target_precision <= lanczos35MP::value), lanczos35MP, - typename std::conditional<(target_precision <= lanczos48MP::value), lanczos48MP, - typename std::conditional<(target_precision <= lanczos49MP::value), lanczos49MP, - typename std::conditional<(target_precision <= lanczos52MP::value), lanczos52MP, undefined_lanczos>::type + using type = typename boost::math::conditional<(target_precision <= lanczos6m24::value), lanczos6m24, + typename boost::math::conditional<(target_precision <= lanczos13m53::value), lanczos13m53, + typename boost::math::conditional<(target_precision <= lanczos11::value), lanczos11, + typename boost::math::conditional<(target_precision <= lanczos17m64::value), lanczos17m64, + typename boost::math::conditional<(target_precision <= lanczos24m113::value), lanczos24m113, + typename boost::math::conditional<(target_precision <= lanczos27MP::value), lanczos27MP, + typename boost::math::conditional<(target_precision <= lanczos35MP::value), lanczos35MP, + typename boost::math::conditional<(target_precision <= lanczos48MP::value), lanczos48MP, + typename boost::math::conditional<(target_precision <= lanczos49MP::value), lanczos49MP, + typename boost::math::conditional<(target_precision <= lanczos52MP::value), lanczos52MP, undefined_lanczos>::type >::type>::type>::type>::type>::type>::type>::type>::type >::type; }; @@ -2748,7 +2752,7 @@ struct lanczos } // namespace boost #if !defined(_CRAYC) && !defined(__CUDACC__) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3))) -#if ((defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64)) && !defined(_MANAGED) +#if ((defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64)) && !defined(_MANAGED) && !defined(BOOST_MATH_HAS_GPU_SUPPORT) #include #endif #endif diff --git a/include/boost/math/special_functions/log1p.hpp b/include/boost/math/special_functions/log1p.hpp index 9b8a8e0eb7..758f606687 100644 --- a/include/boost/math/special_functions/log1p.hpp +++ b/include/boost/math/special_functions/log1p.hpp @@ -12,13 +12,14 @@ #pragma warning(disable:4702) // Unreachable code (release mode only warning) #endif -#include -#include -#include #include #include #include #include +#include +#include +#include +#include #include #include #include @@ -47,16 +48,16 @@ namespace detail { typedef T result_type; - log1p_series(T x) + BOOST_MATH_GPU_ENABLED log1p_series(T x) : k(0), m_mult(-x), m_prod(-1){} - T operator()() + BOOST_MATH_GPU_ENABLED T operator()() { m_prod *= m_mult; return m_prod / ++k; } - int count()const + BOOST_MATH_GPU_ENABLED int count()const { return k; } @@ -79,12 +80,12 @@ namespace detail // it performs no better than log(1+x): which is to say not very well at all. // template -T log1p_imp(T const & x, const Policy& pol, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T log1p_imp(T const & x, const Policy& pol, const boost::math::integral_constant&) { // The function returns the natural logarithm of 1 + x. typedef typename tools::promote_args::type result_type; BOOST_MATH_STD_USING - static const char* function = "boost::math::log1p<%1%>(%1%)"; + constexpr auto function = "boost::math::log1p<%1%>(%1%)"; if((x < -1) || (boost::math::isnan)(x)) return policies::raise_domain_error( @@ -101,7 +102,7 @@ T log1p_imp(T const & x, const Policy& pol, const std::integral_constant if(a < tools::epsilon()) return x; detail::log1p_series s(x); - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); result_type result = tools::sum_series(s, policies::get_epsilon(), max_iter); @@ -110,11 +111,11 @@ T log1p_imp(T const & x, const Policy& pol, const std::integral_constant } template -T log1p_imp(T const& x, const Policy& pol, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T log1p_imp(T const& x, const Policy& pol, const boost::math::integral_constant&) { // The function returns the natural logarithm of 1 + x. BOOST_MATH_STD_USING - static const char* function = "boost::math::log1p<%1%>(%1%)"; + constexpr auto function = "boost::math::log1p<%1%>(%1%)"; if(x < -1) return policies::raise_domain_error( @@ -135,7 +136,7 @@ T log1p_imp(T const& x, const Policy& pol, const std::integral_constant // Expected Error Term: 1.843e-017 // Maximum Relative Change in Control Points: 8.138e-004 // Max Error found at double precision = 3.250766e-016 - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { static_cast(0.15141069795941984e-16L), static_cast(0.35495104378055055e-15L), static_cast(0.33333333333332835L), @@ -145,7 +146,7 @@ T log1p_imp(T const& x, const Policy& pol, const std::integral_constant static_cast(0.13703234928513215L), static_cast(0.011294864812099712L) }; - static const T Q[] = { + BOOST_MATH_STATIC const T Q[] = { static_cast(1L), static_cast(3.7274719063011499L), static_cast(5.5387948649720334L), @@ -163,11 +164,11 @@ T log1p_imp(T const& x, const Policy& pol, const std::integral_constant } template -T log1p_imp(T const& x, const Policy& pol, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T log1p_imp(T const& x, const Policy& pol, const boost::math::integral_constant&) { // The function returns the natural logarithm of 1 + x. BOOST_MATH_STD_USING - static const char* function = "boost::math::log1p<%1%>(%1%)"; + constexpr auto function = "boost::math::log1p<%1%>(%1%)"; if(x < -1) return policies::raise_domain_error( @@ -188,7 +189,7 @@ T log1p_imp(T const& x, const Policy& pol, const std::integral_constant // Expected Error Term: 8.088e-20 // Maximum Relative Change in Control Points: 9.648e-05 // Max Error found at long double precision = 2.242324e-19 - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.807533446680736736712e-19), BOOST_MATH_BIG_CONSTANT(T, 64, -0.490881544804798926426e-18), BOOST_MATH_BIG_CONSTANT(T, 64, 0.333333333333333373941), @@ -199,7 +200,7 @@ T log1p_imp(T const& x, const Policy& pol, const std::integral_constant BOOST_MATH_BIG_CONSTANT(T, 64, 0.0706537026422828914622), BOOST_MATH_BIG_CONSTANT(T, 64, 0.00441709903782239229447) }; - static const T Q[] = { + BOOST_MATH_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, 4.26423872346263928361), BOOST_MATH_BIG_CONSTANT(T, 64, 7.48189472704477708962), @@ -218,11 +219,11 @@ T log1p_imp(T const& x, const Policy& pol, const std::integral_constant } template -T log1p_imp(T const& x, const Policy& pol, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T log1p_imp(T const& x, const Policy& pol, const boost::math::integral_constant&) { // The function returns the natural logarithm of 1 + x. BOOST_MATH_STD_USING - static const char* function = "boost::math::log1p<%1%>(%1%)"; + constexpr auto function = "boost::math::log1p<%1%>(%1%)"; if(x < -1) return policies::raise_domain_error( @@ -244,13 +245,13 @@ T log1p_imp(T const& x, const Policy& pol, const std::integral_constant // Maximum Relative Change in Control Points: 2.509e-04 // Max Error found at double precision = 6.910422e-08 // Max Error found at float precision = 8.357242e-08 - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { -0.671192866803148236519e-7L, 0.119670999140731844725e-6L, 0.333339469182083148598L, 0.237827183019664122066L }; - static const T Q[] = { + BOOST_MATH_STATIC const T Q[] = { 1L, 1.46348272586988539733L, 0.497859871350117338894L, @@ -268,22 +269,24 @@ struct log1p_initializer { struct init { - init() + BOOST_MATH_GPU_ENABLED init() { do_init(tag()); } template - static void do_init(const std::integral_constant&){} - static void do_init(const std::integral_constant&) + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&){} + BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant&) { boost::math::log1p(static_cast(0.25), Policy()); } - void force_instantiate()const{} + BOOST_MATH_GPU_ENABLED void force_instantiate()const{} }; - static const init initializer; - static void force_instantiate() + BOOST_MATH_STATIC const init initializer; + BOOST_MATH_GPU_ENABLED static void force_instantiate() { + #ifndef BOOST_MATH_HAS_GPU_SUPPORT initializer.force_instantiate(); + #endif } }; @@ -294,7 +297,7 @@ const typename log1p_initializer::init log1p_initializer -inline typename tools::promote_args::type log1p(T x, const Policy&) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type log1p(T x, const Policy&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -306,7 +309,7 @@ inline typename tools::promote_args::type log1p(T x, const Policy&) policies::discrete_quantile<>, policies::assert_undefined<> >::type forwarding_policy; - typedef std::integral_constant::type log1p(T x, const Policy&) #if defined(BOOST_HAS_LOG1P) && !(defined(__osf__) && defined(__DECCXX_VER)) # ifdef BOOST_MATH_USE_C99 template -inline float log1p(float x, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline float log1p(float x, const Policy& pol) { if(x < -1) return policies::raise_domain_error( @@ -340,7 +343,7 @@ inline float log1p(float x, const Policy& pol) } #ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS template -inline long double log1p(long double x, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline long double log1p(long double x, const Policy& pol) { if(x < -1) return policies::raise_domain_error( @@ -365,7 +368,7 @@ inline float log1p(float x, const Policy& pol) } #endif template -inline double log1p(double x, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline double log1p(double x, const Policy& pol) { if(x < -1) return policies::raise_domain_error( @@ -425,7 +428,7 @@ inline long double log1p(long double x, const Policy& pol) #endif template -inline typename tools::promote_args::type log1p(T x) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type log1p(T x) { return boost::math::log1p(x, policies::policy<>()); } @@ -433,12 +436,12 @@ inline typename tools::promote_args::type log1p(T x) // Compute log(1+x)-x: // template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type log1pmx(T x, const Policy& pol) { typedef typename tools::promote_args::type result_type; BOOST_MATH_STD_USING - static const char* function = "boost::math::log1pmx<%1%>(%1%)"; + constexpr auto function = "boost::math::log1pmx<%1%>(%1%)"; if(x < -1) return policies::raise_domain_error( @@ -456,7 +459,7 @@ inline typename tools::promote_args::type return -x * x / 2; boost::math::detail::log1p_series s(x); s(); - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); T result = boost::math::tools::sum_series(s, policies::get_epsilon(), max_iter); @@ -465,7 +468,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type log1pmx(T x) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type log1pmx(T x) { return log1pmx(x, policies::policy<>()); } diff --git a/include/boost/math/special_functions/math_fwd.hpp b/include/boost/math/special_functions/math_fwd.hpp index 3e5d6a7625..91dde74ccc 100644 --- a/include/boost/math/special_functions/math_fwd.hpp +++ b/include/boost/math/special_functions/math_fwd.hpp @@ -4,6 +4,7 @@ // Copyright Paul A. Bristow 2006. // Copyright John Maddock 2006. +// Copyright Matt Borland 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. @@ -23,11 +24,91 @@ #pragma once #endif +#include +#include // for argument promotion. +#include +#include +#include + +#ifdef BOOST_MATH_HAS_NVRTC + +namespace boost { +namespace math { + +template +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type +beta(RT1 a, RT2 b, A arg); + +namespace detail{ + + template + struct ellint_3_result + { + using type = typename boost::math::conditional< + policies::is_policy::value, + tools::promote_args_t, + tools::promote_args_t + >::type; + }; + + template + struct expint_result + { + using type = typename boost::math::conditional< + policies::is_policy::value, + tools::promote_args_t, + typename tools::promote_args::type + >::type; + }; + + typedef boost::math::integral_constant bessel_no_int_tag; // No integer optimisation possible. + typedef boost::math::integral_constant bessel_maybe_int_tag; // Maybe integer optimisation. + typedef boost::math::integral_constant bessel_int_tag; // Definite integer optimisation. + + template + struct bessel_traits + { + using result_type = typename boost::math::conditional< + boost::math::is_integral::value, + typename tools::promote_args::type, + tools::promote_args_t + >::type; + + typedef typename policies::precision::type precision_type; + + using optimisation_tag = typename boost::math::conditional< + (precision_type::value <= 0 || precision_type::value > 64), + bessel_no_int_tag, + typename boost::math::conditional< + boost::math::is_integral::value, + bessel_int_tag, + bessel_maybe_int_tag + >::type + >::type; + + using optimisation_tag128 = typename boost::math::conditional< + (precision_type::value <= 0 || precision_type::value > 113), + bessel_no_int_tag, + typename boost::math::conditional< + boost::math::is_integral::value, + bessel_int_tag, + bessel_maybe_int_tag + >::type + >::type; + }; + +} // namespace detail + +} // namespace math +} // namespace boost + +#else + #include #include #include #include -#include // for argument promotion. +#include #include #define BOOST_NO_MACRO_EXPAND /**/ @@ -39,139 +120,139 @@ namespace boost // Beta functions. template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t beta(RT1 a, RT2 b); // Beta function (2 arguments). template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t beta(RT1 a, RT2 b, A x); // Beta function (3 arguments). template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t beta(RT1 a, RT2 b, RT3 x, const Policy& pol); // Beta function (3 arguments). template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t betac(RT1 a, RT2 b, RT3 x); template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t betac(RT1 a, RT2 b, RT3 x, const Policy& pol); template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibeta(RT1 a, RT2 b, RT3 x); // Incomplete beta function. template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibeta(RT1 a, RT2 b, RT3 x, const Policy& pol); // Incomplete beta function. template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibetac(RT1 a, RT2 b, RT3 x); // Incomplete beta complement function. template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibetac(RT1 a, RT2 b, RT3 x, const Policy& pol); // Incomplete beta complement function. template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibeta_inv(T1 a, T2 b, T3 p, T4* py); template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibeta_inv(T1 a, T2 b, T3 p, T4* py, const Policy& pol); template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibeta_inv(RT1 a, RT2 b, RT3 p); // Incomplete beta inverse function. template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibeta_inv(RT1 a, RT2 b, RT3 p, const Policy&); // Incomplete beta inverse function. template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibeta_inva(RT1 a, RT2 b, RT3 p); // Incomplete beta inverse function. template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibeta_inva(RT1 a, RT2 b, RT3 p, const Policy&); // Incomplete beta inverse function. template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibeta_invb(RT1 a, RT2 b, RT3 p); // Incomplete beta inverse function. template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibeta_invb(RT1 a, RT2 b, RT3 p, const Policy&); // Incomplete beta inverse function. template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibetac_inv(T1 a, T2 b, T3 q, T4* py); template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibetac_inv(T1 a, T2 b, T3 q, T4* py, const Policy& pol); template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibetac_inv(RT1 a, RT2 b, RT3 q); // Incomplete beta complement inverse function. template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibetac_inv(RT1 a, RT2 b, RT3 q, const Policy&); // Incomplete beta complement inverse function. template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibetac_inva(RT1 a, RT2 b, RT3 q); // Incomplete beta complement inverse function. template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibetac_inva(RT1 a, RT2 b, RT3 q, const Policy&); // Incomplete beta complement inverse function. template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibetac_invb(RT1 a, RT2 b, RT3 q); // Incomplete beta complement inverse function. template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibetac_invb(RT1 a, RT2 b, RT3 q, const Policy&); // Incomplete beta complement inverse function. template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibeta_derivative(RT1 a, RT2 b, RT3 x); // derivative of incomplete beta template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ibeta_derivative(RT1 a, RT2 b, RT3 x, const Policy& pol); // derivative of incomplete beta // Binomial: template - T binomial_coefficient(unsigned n, unsigned k, const Policy& pol); + BOOST_MATH_GPU_ENABLED T binomial_coefficient(unsigned n, unsigned k, const Policy& pol); template - T binomial_coefficient(unsigned n, unsigned k); + BOOST_MATH_GPU_ENABLED T binomial_coefficient(unsigned n, unsigned k); // erf & erfc error functions. template // Error function. - tools::promote_args_t erf(RT z); + BOOST_MATH_GPU_ENABLED tools::promote_args_t erf(RT z); template // Error function. - tools::promote_args_t erf(RT z, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t erf(RT z, const Policy&); template // Error function complement. - tools::promote_args_t erfc(RT z); + BOOST_MATH_GPU_ENABLED tools::promote_args_t erfc(RT z); template // Error function complement. - tools::promote_args_t erfc(RT z, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t erfc(RT z, const Policy&); template // Error function inverse. - tools::promote_args_t erf_inv(RT z); + BOOST_MATH_GPU_ENABLED tools::promote_args_t erf_inv(RT z); template // Error function inverse. - tools::promote_args_t erf_inv(RT z, const Policy& pol); + BOOST_MATH_GPU_ENABLED tools::promote_args_t erf_inv(RT z, const Policy& pol); template // Error function complement inverse. - tools::promote_args_t erfc_inv(RT z); + BOOST_MATH_GPU_ENABLED tools::promote_args_t erfc_inv(RT z); template // Error function complement inverse. - tools::promote_args_t erfc_inv(RT z, const Policy& pol); + BOOST_MATH_GPU_ENABLED tools::promote_args_t erfc_inv(RT z, const Policy& pol); // Polynomials: template @@ -250,15 +331,15 @@ namespace boost laguerre(unsigned n, T1 m, T2 x); template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t hermite(unsigned n, T x); template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t hermite(unsigned n, T x, const Policy& pol); template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t hermite_next(unsigned n, T1 x, T2 Hn, T3 Hnm1); template @@ -311,90 +392,90 @@ namespace boost // Elliptic integrals: template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ellint_rf(T1 x, T2 y, T3 z); template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ellint_rf(T1 x, T2 y, T3 z, const Policy& pol); template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ellint_rd(T1 x, T2 y, T3 z); template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ellint_rd(T1 x, T2 y, T3 z, const Policy& pol); template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ellint_rc(T1 x, T2 y); template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ellint_rc(T1 x, T2 y, const Policy& pol); template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ellint_rj(T1 x, T2 y, T3 z, T4 p); template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ellint_rj(T1 x, T2 y, T3 z, T4 p, const Policy& pol); template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ellint_rg(T1 x, T2 y, T3 z); template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t ellint_rg(T1 x, T2 y, T3 z, const Policy& pol); template - tools::promote_args_t ellint_2(T k); + BOOST_MATH_GPU_ENABLED tools::promote_args_t ellint_2(T k); template - tools::promote_args_t ellint_2(T1 k, T2 phi); + BOOST_MATH_GPU_ENABLED tools::promote_args_t ellint_2(T1 k, T2 phi); template - tools::promote_args_t ellint_2(T1 k, T2 phi, const Policy& pol); + BOOST_MATH_GPU_ENABLED tools::promote_args_t ellint_2(T1 k, T2 phi, const Policy& pol); template - tools::promote_args_t ellint_1(T k); + BOOST_MATH_GPU_ENABLED tools::promote_args_t ellint_1(T k); template - tools::promote_args_t ellint_1(T1 k, T2 phi); + BOOST_MATH_GPU_ENABLED tools::promote_args_t ellint_1(T1 k, T2 phi); template - tools::promote_args_t ellint_1(T1 k, T2 phi, const Policy& pol); + BOOST_MATH_GPU_ENABLED tools::promote_args_t ellint_1(T1 k, T2 phi, const Policy& pol); template - tools::promote_args_t ellint_d(T k); + BOOST_MATH_GPU_ENABLED tools::promote_args_t ellint_d(T k); template - tools::promote_args_t ellint_d(T1 k, T2 phi); + BOOST_MATH_GPU_ENABLED tools::promote_args_t ellint_d(T1 k, T2 phi); template - tools::promote_args_t ellint_d(T1 k, T2 phi, const Policy& pol); + BOOST_MATH_GPU_ENABLED tools::promote_args_t ellint_d(T1 k, T2 phi, const Policy& pol); template - tools::promote_args_t jacobi_zeta(T1 k, T2 phi); + BOOST_MATH_GPU_ENABLED tools::promote_args_t jacobi_zeta(T1 k, T2 phi); template - tools::promote_args_t jacobi_zeta(T1 k, T2 phi, const Policy& pol); + BOOST_MATH_GPU_ENABLED tools::promote_args_t jacobi_zeta(T1 k, T2 phi, const Policy& pol); template - tools::promote_args_t heuman_lambda(T1 k, T2 phi); + BOOST_MATH_GPU_ENABLED tools::promote_args_t heuman_lambda(T1 k, T2 phi); template - tools::promote_args_t heuman_lambda(T1 k, T2 phi, const Policy& pol); + BOOST_MATH_GPU_ENABLED tools::promote_args_t heuman_lambda(T1 k, T2 phi, const Policy& pol); namespace detail{ template struct ellint_3_result { - using type = typename std::conditional< + using type = typename boost::math::conditional< policies::is_policy::value, tools::promote_args_t, tools::promote_args_t @@ -405,28 +486,28 @@ namespace boost template - typename detail::ellint_3_result::type ellint_3(T1 k, T2 v, T3 phi); + BOOST_MATH_GPU_ENABLED typename detail::ellint_3_result::type ellint_3(T1 k, T2 v, T3 phi); template - tools::promote_args_t ellint_3(T1 k, T2 v, T3 phi, const Policy& pol); + BOOST_MATH_GPU_ENABLED tools::promote_args_t ellint_3(T1 k, T2 v, T3 phi, const Policy& pol); template - tools::promote_args_t ellint_3(T1 k, T2 v); + BOOST_MATH_GPU_ENABLED tools::promote_args_t ellint_3(T1 k, T2 v); // Factorial functions. // Note: not for integral types, at present. template struct max_factorial; template - RT factorial(unsigned int); + BOOST_MATH_GPU_ENABLED RT factorial(unsigned int); template - RT factorial(unsigned int, const Policy& pol); + BOOST_MATH_GPU_ENABLED RT factorial(unsigned int, const Policy& pol); template - RT unchecked_factorial(unsigned int BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(RT)); + BOOST_MATH_GPU_ENABLED RT unchecked_factorial(unsigned int BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(RT)); template - RT double_factorial(unsigned i); + BOOST_MATH_GPU_ENABLED RT double_factorial(unsigned i); template - RT double_factorial(unsigned i, const Policy& pol); + BOOST_MATH_GPU_ENABLED RT double_factorial(unsigned i, const Policy& pol); template tools::promote_args_t falling_factorial(RT x, unsigned n); @@ -442,106 +523,106 @@ namespace boost // Gamma functions. template - tools::promote_args_t tgamma(RT z); + BOOST_MATH_GPU_ENABLED tools::promote_args_t tgamma(RT z); template - tools::promote_args_t tgamma1pm1(RT z); + BOOST_MATH_GPU_ENABLED tools::promote_args_t tgamma1pm1(RT z); template - tools::promote_args_t tgamma1pm1(RT z, const Policy& pol); + BOOST_MATH_GPU_ENABLED tools::promote_args_t tgamma1pm1(RT z, const Policy& pol); template - tools::promote_args_t tgamma(RT1 a, RT2 z); + BOOST_MATH_GPU_ENABLED tools::promote_args_t tgamma(RT1 a, RT2 z); template - tools::promote_args_t tgamma(RT1 a, RT2 z, const Policy& pol); + BOOST_MATH_GPU_ENABLED tools::promote_args_t tgamma(RT1 a, RT2 z, const Policy& pol); template - tools::promote_args_t lgamma(RT z, int* sign); + BOOST_MATH_GPU_ENABLED tools::promote_args_t lgamma(RT z, int* sign); template - tools::promote_args_t lgamma(RT z, int* sign, const Policy& pol); + BOOST_MATH_GPU_ENABLED tools::promote_args_t lgamma(RT z, int* sign, const Policy& pol); template - tools::promote_args_t lgamma(RT x); + BOOST_MATH_GPU_ENABLED tools::promote_args_t lgamma(RT x); template - tools::promote_args_t lgamma(RT x, const Policy& pol); + BOOST_MATH_GPU_ENABLED tools::promote_args_t lgamma(RT x, const Policy& pol); template - tools::promote_args_t tgamma_lower(RT1 a, RT2 z); + BOOST_MATH_GPU_ENABLED tools::promote_args_t tgamma_lower(RT1 a, RT2 z); template - tools::promote_args_t tgamma_lower(RT1 a, RT2 z, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t tgamma_lower(RT1 a, RT2 z, const Policy&); template - tools::promote_args_t gamma_q(RT1 a, RT2 z); + BOOST_MATH_GPU_ENABLED tools::promote_args_t gamma_q(RT1 a, RT2 z); template - tools::promote_args_t gamma_q(RT1 a, RT2 z, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t gamma_q(RT1 a, RT2 z, const Policy&); template - tools::promote_args_t gamma_p(RT1 a, RT2 z); + BOOST_MATH_GPU_ENABLED tools::promote_args_t gamma_p(RT1 a, RT2 z); template - tools::promote_args_t gamma_p(RT1 a, RT2 z, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t gamma_p(RT1 a, RT2 z, const Policy&); template - tools::promote_args_t tgamma_delta_ratio(T1 z, T2 delta); + BOOST_MATH_GPU_ENABLED tools::promote_args_t tgamma_delta_ratio(T1 z, T2 delta); template - tools::promote_args_t tgamma_delta_ratio(T1 z, T2 delta, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t tgamma_delta_ratio(T1 z, T2 delta, const Policy&); template - tools::promote_args_t tgamma_ratio(T1 a, T2 b); + BOOST_MATH_GPU_ENABLED tools::promote_args_t tgamma_ratio(T1 a, T2 b); template - tools::promote_args_t tgamma_ratio(T1 a, T2 b, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t tgamma_ratio(T1 a, T2 b, const Policy&); template - tools::promote_args_t gamma_p_derivative(T1 a, T2 x); + BOOST_MATH_GPU_ENABLED tools::promote_args_t gamma_p_derivative(T1 a, T2 x); template - tools::promote_args_t gamma_p_derivative(T1 a, T2 x, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t gamma_p_derivative(T1 a, T2 x, const Policy&); // gamma inverse. template - tools::promote_args_t gamma_p_inv(T1 a, T2 p); + BOOST_MATH_GPU_ENABLED tools::promote_args_t gamma_p_inv(T1 a, T2 p); template - tools::promote_args_t gamma_p_inva(T1 a, T2 p, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t gamma_p_inva(T1 a, T2 p, const Policy&); template - tools::promote_args_t gamma_p_inva(T1 a, T2 p); + BOOST_MATH_GPU_ENABLED tools::promote_args_t gamma_p_inva(T1 a, T2 p); template - tools::promote_args_t gamma_p_inv(T1 a, T2 p, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t gamma_p_inv(T1 a, T2 p, const Policy&); template - tools::promote_args_t gamma_q_inv(T1 a, T2 q); + BOOST_MATH_GPU_ENABLED tools::promote_args_t gamma_q_inv(T1 a, T2 q); template - tools::promote_args_t gamma_q_inv(T1 a, T2 q, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t gamma_q_inv(T1 a, T2 q, const Policy&); template - tools::promote_args_t gamma_q_inva(T1 a, T2 q); + BOOST_MATH_GPU_ENABLED tools::promote_args_t gamma_q_inva(T1 a, T2 q); template - tools::promote_args_t gamma_q_inva(T1 a, T2 q, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t gamma_q_inva(T1 a, T2 q, const Policy&); // digamma: template - tools::promote_args_t digamma(T x); + BOOST_MATH_GPU_ENABLED tools::promote_args_t digamma(T x); template - tools::promote_args_t digamma(T x, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t digamma(T x, const Policy&); // trigamma: template - tools::promote_args_t trigamma(T x); + BOOST_MATH_GPU_ENABLED tools::promote_args_t trigamma(T x); template - tools::promote_args_t trigamma(T x, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t trigamma(T x, const Policy&); // polygamma: template @@ -552,63 +633,63 @@ namespace boost // Hypotenuse function sqrt(x ^ 2 + y ^ 2). template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t hypot(T1 x, T2 y); template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t hypot(T1 x, T2 y, const Policy&); // cbrt - cube root. template - tools::promote_args_t cbrt(RT z); + BOOST_MATH_GPU_ENABLED tools::promote_args_t cbrt(RT z); template - tools::promote_args_t cbrt(RT z, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t cbrt(RT z, const Policy&); // log1p is log(x + 1) template - tools::promote_args_t log1p(T); + BOOST_MATH_GPU_ENABLED tools::promote_args_t log1p(T); template - tools::promote_args_t log1p(T, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t log1p(T, const Policy&); // log1pmx is log(x + 1) - x template - tools::promote_args_t log1pmx(T); + BOOST_MATH_GPU_ENABLED tools::promote_args_t log1pmx(T); template - tools::promote_args_t log1pmx(T, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t log1pmx(T, const Policy&); // Exp (x) minus 1 functions. template - tools::promote_args_t expm1(T); + BOOST_MATH_GPU_ENABLED tools::promote_args_t expm1(T); template - tools::promote_args_t expm1(T, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t expm1(T, const Policy&); // Power - 1 template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t powm1(const T1 a, const T2 z); template - tools::promote_args_t + BOOST_MATH_GPU_ENABLED tools::promote_args_t powm1(const T1 a, const T2 z, const Policy&); // sqrt(1+x) - 1 template - tools::promote_args_t sqrt1pm1(const T& val); + BOOST_MATH_GPU_ENABLED tools::promote_args_t sqrt1pm1(const T& val); template - tools::promote_args_t sqrt1pm1(const T& val, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t sqrt1pm1(const T& val, const Policy&); // sinus cardinals: template - tools::promote_args_t sinc_pi(T x); + BOOST_MATH_GPU_ENABLED tools::promote_args_t sinc_pi(T x); template - tools::promote_args_t sinc_pi(T x, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t sinc_pi(T x, const Policy&); template tools::promote_args_t sinhc_pi(T x); @@ -630,43 +711,43 @@ namespace boost tools::promote_args_t acosh(T x, const Policy&); template - tools::promote_args_t atanh(T x); + BOOST_MATH_GPU_ENABLED tools::promote_args_t atanh(T x); template - tools::promote_args_t atanh(T x, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t atanh(T x, const Policy&); namespace detail{ - typedef std::integral_constant bessel_no_int_tag; // No integer optimisation possible. - typedef std::integral_constant bessel_maybe_int_tag; // Maybe integer optimisation. - typedef std::integral_constant bessel_int_tag; // Definite integer optimisation. + typedef boost::math::integral_constant bessel_no_int_tag; // No integer optimisation possible. + typedef boost::math::integral_constant bessel_maybe_int_tag; // Maybe integer optimisation. + typedef boost::math::integral_constant bessel_int_tag; // Definite integer optimisation. template struct bessel_traits { - using result_type = typename std::conditional< - std::is_integral::value, + using result_type = typename boost::math::conditional< + boost::math::is_integral::value, typename tools::promote_args::type, tools::promote_args_t >::type; typedef typename policies::precision::type precision_type; - using optimisation_tag = typename std::conditional< + using optimisation_tag = typename boost::math::conditional< (precision_type::value <= 0 || precision_type::value > 64), bessel_no_int_tag, - typename std::conditional< - std::is_integral::value, + typename boost::math::conditional< + boost::math::is_integral::value, bessel_int_tag, bessel_maybe_int_tag >::type >::type; - using optimisation_tag128 = typename std::conditional< + using optimisation_tag128 = typename boost::math::conditional< (precision_type::value <= 0 || precision_type::value > 113), bessel_no_int_tag, - typename std::conditional< - std::is_integral::value, + typename boost::math::conditional< + boost::math::is_integral::value, bessel_int_tag, bessel_maybe_int_tag >::type @@ -676,223 +757,225 @@ namespace boost // Bessel functions: template - typename detail::bessel_traits::result_type cyl_bessel_j(T1 v, T2 x, const Policy& pol); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits::result_type cyl_bessel_j(T1 v, T2 x, const Policy& pol); template - typename detail::bessel_traits::result_type cyl_bessel_j_prime(T1 v, T2 x, const Policy& pol); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits::result_type cyl_bessel_j_prime(T1 v, T2 x, const Policy& pol); template - typename detail::bessel_traits >::result_type cyl_bessel_j(T1 v, T2 x); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits >::result_type cyl_bessel_j(T1 v, T2 x); template - typename detail::bessel_traits >::result_type cyl_bessel_j_prime(T1 v, T2 x); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits >::result_type cyl_bessel_j_prime(T1 v, T2 x); template - typename detail::bessel_traits::result_type sph_bessel(unsigned v, T x, const Policy& pol); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits::result_type sph_bessel(unsigned v, T x, const Policy& pol); template - typename detail::bessel_traits::result_type sph_bessel_prime(unsigned v, T x, const Policy& pol); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits::result_type sph_bessel_prime(unsigned v, T x, const Policy& pol); template - typename detail::bessel_traits >::result_type sph_bessel(unsigned v, T x); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits >::result_type sph_bessel(unsigned v, T x); template - typename detail::bessel_traits >::result_type sph_bessel_prime(unsigned v, T x); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits >::result_type sph_bessel_prime(unsigned v, T x); template - typename detail::bessel_traits::result_type cyl_bessel_i(T1 v, T2 x, const Policy& pol); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits::result_type cyl_bessel_i(T1 v, T2 x, const Policy& pol); template - typename detail::bessel_traits::result_type cyl_bessel_i_prime(T1 v, T2 x, const Policy& pol); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits::result_type cyl_bessel_i_prime(T1 v, T2 x, const Policy& pol); template - typename detail::bessel_traits >::result_type cyl_bessel_i(T1 v, T2 x); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits >::result_type cyl_bessel_i(T1 v, T2 x); template - typename detail::bessel_traits >::result_type cyl_bessel_i_prime(T1 v, T2 x); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits >::result_type cyl_bessel_i_prime(T1 v, T2 x); template - typename detail::bessel_traits::result_type cyl_bessel_k(T1 v, T2 x, const Policy& pol); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits::result_type cyl_bessel_k(T1 v, T2 x, const Policy& pol); template - typename detail::bessel_traits::result_type cyl_bessel_k_prime(T1 v, T2 x, const Policy& pol); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits::result_type cyl_bessel_k_prime(T1 v, T2 x, const Policy& pol); template - typename detail::bessel_traits >::result_type cyl_bessel_k(T1 v, T2 x); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits >::result_type cyl_bessel_k(T1 v, T2 x); template - typename detail::bessel_traits >::result_type cyl_bessel_k_prime(T1 v, T2 x); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits >::result_type cyl_bessel_k_prime(T1 v, T2 x); template - typename detail::bessel_traits::result_type cyl_neumann(T1 v, T2 x, const Policy& pol); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits::result_type cyl_neumann(T1 v, T2 x, const Policy& pol); template - typename detail::bessel_traits::result_type cyl_neumann_prime(T1 v, T2 x, const Policy& pol); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits::result_type cyl_neumann_prime(T1 v, T2 x, const Policy& pol); template - typename detail::bessel_traits >::result_type cyl_neumann(T1 v, T2 x); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits >::result_type cyl_neumann(T1 v, T2 x); template - typename detail::bessel_traits >::result_type cyl_neumann_prime(T1 v, T2 x); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits >::result_type cyl_neumann_prime(T1 v, T2 x); template - typename detail::bessel_traits::result_type sph_neumann(unsigned v, T x, const Policy& pol); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits::result_type sph_neumann(unsigned v, T x, const Policy& pol); template - typename detail::bessel_traits::result_type sph_neumann_prime(unsigned v, T x, const Policy& pol); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits::result_type sph_neumann_prime(unsigned v, T x, const Policy& pol); template - typename detail::bessel_traits >::result_type sph_neumann(unsigned v, T x); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits >::result_type sph_neumann(unsigned v, T x); template - typename detail::bessel_traits >::result_type sph_neumann_prime(unsigned v, T x); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits >::result_type sph_neumann_prime(unsigned v, T x); template - typename detail::bessel_traits::result_type cyl_bessel_j_zero(T v, int m, const Policy& pol); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits::result_type cyl_bessel_j_zero(T v, int m, const Policy& pol); template - typename detail::bessel_traits >::result_type cyl_bessel_j_zero(T v, int m); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits >::result_type cyl_bessel_j_zero(T v, int m); template - OutputIterator cyl_bessel_j_zero(T v, + BOOST_MATH_GPU_ENABLED OutputIterator cyl_bessel_j_zero(T v, int start_index, unsigned number_of_zeros, OutputIterator out_it); template - OutputIterator cyl_bessel_j_zero(T v, + BOOST_MATH_GPU_ENABLED OutputIterator cyl_bessel_j_zero(T v, int start_index, unsigned number_of_zeros, OutputIterator out_it, const Policy&); template - typename detail::bessel_traits::result_type cyl_neumann_zero(T v, int m, const Policy& pol); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits::result_type cyl_neumann_zero(T v, int m, const Policy& pol); template - typename detail::bessel_traits >::result_type cyl_neumann_zero(T v, int m); + BOOST_MATH_GPU_ENABLED typename detail::bessel_traits >::result_type cyl_neumann_zero(T v, int m); template - OutputIterator cyl_neumann_zero(T v, + BOOST_MATH_GPU_ENABLED OutputIterator cyl_neumann_zero(T v, int start_index, unsigned number_of_zeros, OutputIterator out_it); template - OutputIterator cyl_neumann_zero(T v, + BOOST_MATH_GPU_ENABLED OutputIterator cyl_neumann_zero(T v, int start_index, unsigned number_of_zeros, OutputIterator out_it, const Policy&); template - std::complex >::result_type> cyl_hankel_1(T1 v, T2 x); + BOOST_MATH_GPU_ENABLED boost::math::complex >::result_type> cyl_hankel_1(T1 v, T2 x); template - std::complex::result_type> cyl_hankel_1(T1 v, T2 x, const Policy& pol); + BOOST_MATH_GPU_ENABLED boost::math::complex::result_type> cyl_hankel_1(T1 v, T2 x, const Policy& pol); template - std::complex::result_type> cyl_hankel_2(T1 v, T2 x, const Policy& pol); + BOOST_MATH_GPU_ENABLED boost::math::complex::result_type> cyl_hankel_2(T1 v, T2 x, const Policy& pol); template - std::complex >::result_type> cyl_hankel_2(T1 v, T2 x); + BOOST_MATH_GPU_ENABLED boost::math::complex >::result_type> cyl_hankel_2(T1 v, T2 x); template - std::complex::result_type> sph_hankel_1(T1 v, T2 x, const Policy& pol); + BOOST_MATH_GPU_ENABLED boost::math::complex::result_type> sph_hankel_1(T1 v, T2 x, const Policy& pol); template - std::complex >::result_type> sph_hankel_1(T1 v, T2 x); + BOOST_MATH_GPU_ENABLED boost::math::complex >::result_type> sph_hankel_1(T1 v, T2 x); template - std::complex::result_type> sph_hankel_2(T1 v, T2 x, const Policy& pol); + BOOST_MATH_GPU_ENABLED boost::math::complex::result_type> sph_hankel_2(T1 v, T2 x, const Policy& pol); template - std::complex >::result_type> sph_hankel_2(T1 v, T2 x); + BOOST_MATH_GPU_ENABLED boost::math::complex >::result_type> sph_hankel_2(T1 v, T2 x); template - tools::promote_args_t airy_ai(T x, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t airy_ai(T x, const Policy&); template - tools::promote_args_t airy_ai(T x); + BOOST_MATH_GPU_ENABLED tools::promote_args_t airy_ai(T x); template - tools::promote_args_t airy_bi(T x, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t airy_bi(T x, const Policy&); template - tools::promote_args_t airy_bi(T x); + BOOST_MATH_GPU_ENABLED tools::promote_args_t airy_bi(T x); template - tools::promote_args_t airy_ai_prime(T x, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t airy_ai_prime(T x, const Policy&); template - tools::promote_args_t airy_ai_prime(T x); + BOOST_MATH_GPU_ENABLED tools::promote_args_t airy_ai_prime(T x); template - tools::promote_args_t airy_bi_prime(T x, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t airy_bi_prime(T x, const Policy&); template - tools::promote_args_t airy_bi_prime(T x); + BOOST_MATH_GPU_ENABLED tools::promote_args_t airy_bi_prime(T x); template - T airy_ai_zero(int m); + BOOST_MATH_GPU_ENABLED T airy_ai_zero(int m); template - T airy_ai_zero(int m, const Policy&); + BOOST_MATH_GPU_ENABLED T airy_ai_zero(int m, const Policy&); template - OutputIterator airy_ai_zero( + BOOST_MATH_GPU_ENABLED OutputIterator airy_ai_zero( int start_index, unsigned number_of_zeros, OutputIterator out_it); template - OutputIterator airy_ai_zero( + BOOST_MATH_GPU_ENABLED OutputIterator airy_ai_zero( int start_index, unsigned number_of_zeros, OutputIterator out_it, const Policy&); template - T airy_bi_zero(int m); + BOOST_MATH_GPU_ENABLED T airy_bi_zero(int m); template - T airy_bi_zero(int m, const Policy&); + BOOST_MATH_GPU_ENABLED T airy_bi_zero(int m, const Policy&); template - OutputIterator airy_bi_zero( + BOOST_MATH_GPU_ENABLED OutputIterator airy_bi_zero( int start_index, unsigned number_of_zeros, OutputIterator out_it); template - OutputIterator airy_bi_zero( + BOOST_MATH_GPU_ENABLED OutputIterator airy_bi_zero( int start_index, unsigned number_of_zeros, OutputIterator out_it, const Policy&); template - tools::promote_args_t sin_pi(T x, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t sin_pi(T x, const Policy&); template - tools::promote_args_t sin_pi(T x); + BOOST_MATH_GPU_ENABLED tools::promote_args_t sin_pi(T x); template - tools::promote_args_t cos_pi(T x, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t cos_pi(T x, const Policy&); template - tools::promote_args_t cos_pi(T x); + BOOST_MATH_GPU_ENABLED tools::promote_args_t cos_pi(T x); template - int fpclassify BOOST_NO_MACRO_EXPAND(T t); + BOOST_MATH_GPU_ENABLED int fpclassify BOOST_NO_MACRO_EXPAND(T t); template - bool isfinite BOOST_NO_MACRO_EXPAND(T z); + BOOST_MATH_GPU_ENABLED bool isfinite BOOST_NO_MACRO_EXPAND(T z); template - bool isinf BOOST_NO_MACRO_EXPAND(T t); + BOOST_MATH_GPU_ENABLED bool isinf BOOST_NO_MACRO_EXPAND(T t); template - bool isnan BOOST_NO_MACRO_EXPAND(T t); + BOOST_MATH_GPU_ENABLED bool isnan BOOST_NO_MACRO_EXPAND(T t); template - bool isnormal BOOST_NO_MACRO_EXPAND(T t); + BOOST_MATH_GPU_ENABLED bool isnormal BOOST_NO_MACRO_EXPAND(T t); template - int signbit BOOST_NO_MACRO_EXPAND(T x); + BOOST_MATH_GPU_ENABLED int signbit BOOST_NO_MACRO_EXPAND(T x); template - int sign BOOST_NO_MACRO_EXPAND(const T& z); + BOOST_MATH_GPU_ENABLED int sign BOOST_NO_MACRO_EXPAND(const T& z); template - typename tools::promote_args_permissive::type copysign BOOST_NO_MACRO_EXPAND(const T& x, const U& y); + BOOST_MATH_GPU_ENABLED typename tools::promote_args_permissive::type + copysign BOOST_NO_MACRO_EXPAND(const T& x, const U& y); template - typename tools::promote_args_permissive::type changesign BOOST_NO_MACRO_EXPAND(const T& z); + BOOST_MATH_GPU_ENABLED typename tools::promote_args_permissive::type + changesign BOOST_NO_MACRO_EXPAND(const T& z); // Exponential integrals: namespace detail{ @@ -900,7 +983,7 @@ namespace boost template struct expint_result { - typedef typename std::conditional< + typedef typename boost::math::conditional< policies::is_policy::value, tools::promote_args_t, typename tools::promote_args::type @@ -910,13 +993,13 @@ namespace boost } // namespace detail template - tools::promote_args_t expint(unsigned n, T z, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t expint(unsigned n, T z, const Policy&); template - typename detail::expint_result::type expint(T const z, U const u); + BOOST_MATH_GPU_ENABLED typename detail::expint_result::type expint(T const z, U const u); template - tools::promote_args_t expint(T z); + BOOST_MATH_GPU_ENABLED tools::promote_args_t expint(T z); // Zeta: template @@ -1087,10 +1170,10 @@ namespace boost // pow: template - BOOST_MATH_CXX14_CONSTEXPR tools::promote_args_t pow(T base, const Policy& policy); + BOOST_MATH_GPU_ENABLED BOOST_MATH_CXX14_CONSTEXPR tools::promote_args_t pow(T base, const Policy& policy); template - BOOST_MATH_CXX14_CONSTEXPR tools::promote_args_t pow(T base); + BOOST_MATH_GPU_ENABLED BOOST_MATH_CXX14_CONSTEXPR tools::promote_args_t pow(T base); // next: template @@ -1191,13 +1274,13 @@ namespace boost #define BOOST_MATH_DETAIL_LL_FUNC(Policy)\ \ template \ - inline T modf(const T& v, long long* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long long* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\ \ template \ - inline long long lltrunc(const T& v){ using boost::math::lltrunc; return lltrunc(v, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline long long lltrunc(const T& v){ using boost::math::lltrunc; return lltrunc(v, Policy()); }\ \ template \ - inline long long llround(const T& v){ using boost::math::llround; return llround(v, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline long long llround(const T& v){ using boost::math::llround; return llround(v, Policy()); }\ # define BOOST_MATH_DETAIL_11_FUNC(Policy)\ template \ @@ -1210,74 +1293,74 @@ namespace boost BOOST_MATH_DETAIL_11_FUNC(Policy)\ \ template \ - inline boost::math::tools::promote_args_t \ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t \ beta(RT1 a, RT2 b) { return ::boost::math::beta(a, b, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t \ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t \ beta(RT1 a, RT2 b, A x){ return ::boost::math::beta(a, b, x, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t \ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t \ betac(RT1 a, RT2 b, RT3 x) { return ::boost::math::betac(a, b, x, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t \ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t \ ibeta(RT1 a, RT2 b, RT3 x){ return ::boost::math::ibeta(a, b, x, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t \ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t \ ibetac(RT1 a, RT2 b, RT3 x){ return ::boost::math::ibetac(a, b, x, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t \ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t \ ibeta_inv(T1 a, T2 b, T3 p, T4* py){ return ::boost::math::ibeta_inv(a, b, p, py, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t \ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t \ ibeta_inv(RT1 a, RT2 b, RT3 p){ return ::boost::math::ibeta_inv(a, b, p, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t \ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t \ ibetac_inv(T1 a, T2 b, T3 q, T4* py){ return ::boost::math::ibetac_inv(a, b, q, py, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t \ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t \ ibeta_inva(RT1 a, RT2 b, RT3 p){ return ::boost::math::ibeta_inva(a, b, p, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t \ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t \ ibetac_inva(T1 a, T2 b, T3 q){ return ::boost::math::ibetac_inva(a, b, q, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t \ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t \ ibeta_invb(RT1 a, RT2 b, RT3 p){ return ::boost::math::ibeta_invb(a, b, p, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t \ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t \ ibetac_invb(T1 a, T2 b, T3 q){ return ::boost::math::ibetac_invb(a, b, q, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t \ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t \ ibetac_inv(RT1 a, RT2 b, RT3 q){ return ::boost::math::ibetac_inv(a, b, q, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t \ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t \ ibeta_derivative(RT1 a, RT2 b, RT3 x){ return ::boost::math::ibeta_derivative(a, b, x, Policy()); }\ \ - template T binomial_coefficient(unsigned n, unsigned k){ return ::boost::math::binomial_coefficient(n, k, Policy()); }\ + template BOOST_MATH_GPU_ENABLED T binomial_coefficient(unsigned n, unsigned k){ return ::boost::math::binomial_coefficient(n, k, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t erf(RT z) { return ::boost::math::erf(z, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t erf(RT z) { return ::boost::math::erf(z, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t erfc(RT z){ return ::boost::math::erfc(z, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t erfc(RT z){ return ::boost::math::erfc(z, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t erf_inv(RT z) { return ::boost::math::erf_inv(z, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t erf_inv(RT z) { return ::boost::math::erf_inv(z, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t erfc_inv(RT z){ return ::boost::math::erfc_inv(z, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t erfc_inv(RT z){ return ::boost::math::erfc_inv(z, Policy()); }\ \ using boost::math::legendre_next;\ \ @@ -1310,7 +1393,7 @@ namespace boost laguerre(unsigned n, T1 m, T2 x) { return ::boost::math::laguerre(n, m, x, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t \ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t \ hermite(unsigned n, T x){ return ::boost::math::hermite(n, x, Policy()); }\ \ using boost::math::hermite_next;\ @@ -1345,145 +1428,145 @@ namespace boost spherical_harmonic_i(unsigned n, int m, T1 theta, T2 phi, const Policy& pol);\ \ template \ - inline boost::math::tools::promote_args_t \ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t \ ellint_rf(T1 x, T2 y, T3 z){ return ::boost::math::ellint_rf(x, y, z, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t \ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t \ ellint_rd(T1 x, T2 y, T3 z){ return ::boost::math::ellint_rd(x, y, z, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t \ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t \ ellint_rc(T1 x, T2 y){ return ::boost::math::ellint_rc(x, y, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t \ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t \ ellint_rj(T1 x, T2 y, T3 z, T4 p){ return boost::math::ellint_rj(x, y, z, p, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t \ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t \ ellint_rg(T1 x, T2 y, T3 z){ return ::boost::math::ellint_rg(x, y, z, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t ellint_2(T k){ return boost::math::ellint_2(k, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t ellint_2(T k){ return boost::math::ellint_2(k, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t ellint_2(T1 k, T2 phi){ return boost::math::ellint_2(k, phi, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t ellint_2(T1 k, T2 phi){ return boost::math::ellint_2(k, phi, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t ellint_d(T k){ return boost::math::ellint_d(k, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t ellint_d(T k){ return boost::math::ellint_d(k, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t ellint_d(T1 k, T2 phi){ return boost::math::ellint_d(k, phi, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t ellint_d(T1 k, T2 phi){ return boost::math::ellint_d(k, phi, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t jacobi_zeta(T1 k, T2 phi){ return boost::math::jacobi_zeta(k, phi, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t jacobi_zeta(T1 k, T2 phi){ return boost::math::jacobi_zeta(k, phi, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t heuman_lambda(T1 k, T2 phi){ return boost::math::heuman_lambda(k, phi, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t heuman_lambda(T1 k, T2 phi){ return boost::math::heuman_lambda(k, phi, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t ellint_1(T k){ return boost::math::ellint_1(k, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t ellint_1(T k){ return boost::math::ellint_1(k, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t ellint_1(T1 k, T2 phi){ return boost::math::ellint_1(k, phi, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t ellint_1(T1 k, T2 phi){ return boost::math::ellint_1(k, phi, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t ellint_3(T1 k, T2 v, T3 phi){ return boost::math::ellint_3(k, v, phi, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t ellint_3(T1 k, T2 v, T3 phi){ return boost::math::ellint_3(k, v, phi, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t ellint_3(T1 k, T2 v){ return boost::math::ellint_3(k, v, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t ellint_3(T1 k, T2 v){ return boost::math::ellint_3(k, v, Policy()); }\ \ using boost::math::max_factorial;\ template \ - inline RT factorial(unsigned int i) { return boost::math::factorial(i, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline RT factorial(unsigned int i) { return boost::math::factorial(i, Policy()); }\ using boost::math::unchecked_factorial;\ template \ - inline RT double_factorial(unsigned i){ return boost::math::double_factorial(i, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline RT double_factorial(unsigned i){ return boost::math::double_factorial(i, Policy()); }\ template \ inline boost::math::tools::promote_args_t falling_factorial(RT x, unsigned n){ return boost::math::falling_factorial(x, n, Policy()); }\ template \ inline boost::math::tools::promote_args_t rising_factorial(RT x, unsigned n){ return boost::math::rising_factorial(x, n, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t tgamma(RT z){ return boost::math::tgamma(z, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t tgamma(RT z){ return boost::math::tgamma(z, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t tgamma1pm1(RT z){ return boost::math::tgamma1pm1(z, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t tgamma1pm1(RT z){ return boost::math::tgamma1pm1(z, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t tgamma(RT1 a, RT2 z){ return boost::math::tgamma(a, z, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t tgamma(RT1 a, RT2 z){ return boost::math::tgamma(a, z, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t lgamma(RT z, int* sign){ return boost::math::lgamma(z, sign, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t lgamma(RT z, int* sign){ return boost::math::lgamma(z, sign, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t lgamma(RT x){ return boost::math::lgamma(x, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t lgamma(RT x){ return boost::math::lgamma(x, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t tgamma_lower(RT1 a, RT2 z){ return boost::math::tgamma_lower(a, z, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t tgamma_lower(RT1 a, RT2 z){ return boost::math::tgamma_lower(a, z, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t gamma_q(RT1 a, RT2 z){ return boost::math::gamma_q(a, z, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t gamma_q(RT1 a, RT2 z){ return boost::math::gamma_q(a, z, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t gamma_p(RT1 a, RT2 z){ return boost::math::gamma_p(a, z, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t gamma_p(RT1 a, RT2 z){ return boost::math::gamma_p(a, z, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t tgamma_delta_ratio(T1 z, T2 delta){ return boost::math::tgamma_delta_ratio(z, delta, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t tgamma_delta_ratio(T1 z, T2 delta){ return boost::math::tgamma_delta_ratio(z, delta, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t tgamma_ratio(T1 a, T2 b) { return boost::math::tgamma_ratio(a, b, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t tgamma_ratio(T1 a, T2 b) { return boost::math::tgamma_ratio(a, b, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t gamma_p_derivative(T1 a, T2 x){ return boost::math::gamma_p_derivative(a, x, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t gamma_p_derivative(T1 a, T2 x){ return boost::math::gamma_p_derivative(a, x, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t gamma_p_inv(T1 a, T2 p){ return boost::math::gamma_p_inv(a, p, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t gamma_p_inv(T1 a, T2 p){ return boost::math::gamma_p_inv(a, p, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t gamma_p_inva(T1 a, T2 p){ return boost::math::gamma_p_inva(a, p, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t gamma_p_inva(T1 a, T2 p){ return boost::math::gamma_p_inva(a, p, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t gamma_q_inv(T1 a, T2 q){ return boost::math::gamma_q_inv(a, q, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t gamma_q_inv(T1 a, T2 q){ return boost::math::gamma_q_inv(a, q, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t gamma_q_inva(T1 a, T2 q){ return boost::math::gamma_q_inva(a, q, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t gamma_q_inva(T1 a, T2 q){ return boost::math::gamma_q_inva(a, q, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t digamma(T x){ return boost::math::digamma(x, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t digamma(T x){ return boost::math::digamma(x, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t trigamma(T x){ return boost::math::trigamma(x, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t trigamma(T x){ return boost::math::trigamma(x, Policy()); }\ \ template \ inline boost::math::tools::promote_args_t polygamma(int n, T x){ return boost::math::polygamma(n, x, Policy()); }\ \ template \ inline boost::math::tools::promote_args_t \ - hypot(T1 x, T2 y){ return boost::math::hypot(x, y, Policy()); }\ + BOOST_MATH_GPU_ENABLED hypot(T1 x, T2 y){ return boost::math::hypot(x, y, Policy()); }\ \ template \ inline boost::math::tools::promote_args_t cbrt(RT z){ return boost::math::cbrt(z, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t log1p(T x){ return boost::math::log1p(x, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t log1p(T x){ return boost::math::log1p(x, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t log1pmx(T x){ return boost::math::log1pmx(x, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t log1pmx(T x){ return boost::math::log1pmx(x, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t expm1(T x){ return boost::math::expm1(x, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t expm1(T x){ return boost::math::expm1(x, Policy()); }\ \ template \ inline boost::math::tools::promote_args_t \ - powm1(const T1 a, const T2 z){ return boost::math::powm1(a, z, Policy()); }\ + BOOST_MATH_GPU_ENABLED powm1(const T1 a, const T2 z){ return boost::math::powm1(a, z, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t sqrt1pm1(const T& val){ return boost::math::sqrt1pm1(val, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t sqrt1pm1(const T& val){ return boost::math::sqrt1pm1(val, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t sinc_pi(T x){ return boost::math::sinc_pi(x, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t sinc_pi(T x){ return boost::math::sinc_pi(x, Policy()); }\ \ template \ inline boost::math::tools::promote_args_t sinhc_pi(T x){ return boost::math::sinhc_pi(x, Policy()); }\ @@ -1495,7 +1578,7 @@ namespace boost inline boost::math::tools::promote_args_t acosh(const T x){ return boost::math::acosh(x, Policy()); }\ \ template\ - inline boost::math::tools::promote_args_t atanh(const T x){ return boost::math::atanh(x, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t atanh(const T x){ return boost::math::atanh(x, Policy()); }\ \ template \ inline typename boost::math::detail::bessel_traits::result_type cyl_bessel_j(T1 v, T2 x)\ @@ -1568,10 +1651,10 @@ template \ { boost::math::cyl_neumann_zero(v, start_index, number_of_zeros, out_it, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t sin_pi(T x){ return boost::math::sin_pi(x, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t sin_pi(T x){ return boost::math::sin_pi(x, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t cos_pi(T x){ return boost::math::cos_pi(x, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t cos_pi(T x){ return boost::math::cos_pi(x, Policy()); }\ \ using boost::math::fpclassify;\ using boost::math::isfinite;\ @@ -1584,44 +1667,44 @@ template \ using boost::math::changesign;\ \ template \ - inline typename boost::math::tools::promote_args_t expint(T const& z, U const& u)\ + BOOST_MATH_GPU_ENABLED inline typename boost::math::tools::promote_args_t expint(T const& z, U const& u)\ { return boost::math::expint(z, u, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t expint(T z){ return boost::math::expint(z, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t expint(T z){ return boost::math::expint(z, Policy()); }\ \ template \ inline boost::math::tools::promote_args_t zeta(T s){ return boost::math::zeta(s, Policy()); }\ \ template \ - inline T round(const T& v){ using boost::math::round; return round(v, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline T round(const T& v){ using boost::math::round; return round(v, Policy()); }\ \ template \ - inline int iround(const T& v){ using boost::math::iround; return iround(v, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline int iround(const T& v){ using boost::math::iround; return iround(v, Policy()); }\ \ template \ - inline long lround(const T& v){ using boost::math::lround; return lround(v, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline long lround(const T& v){ using boost::math::lround; return lround(v, Policy()); }\ \ template \ - inline T trunc(const T& v){ using boost::math::trunc; return trunc(v, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline T trunc(const T& v){ using boost::math::trunc; return trunc(v, Policy()); }\ \ template \ - inline int itrunc(const T& v){ using boost::math::itrunc; return itrunc(v, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline int itrunc(const T& v){ using boost::math::itrunc; return itrunc(v, Policy()); }\ \ template \ - inline long ltrunc(const T& v){ using boost::math::ltrunc; return ltrunc(v, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline long ltrunc(const T& v){ using boost::math::ltrunc; return ltrunc(v, Policy()); }\ \ template \ - inline T modf(const T& v, T* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline T modf(const T& v, T* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\ \ template \ - inline T modf(const T& v, int* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline T modf(const T& v, int* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\ \ template \ - inline T modf(const T& v, long* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t pow(T v){ return boost::math::pow(v, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t pow(T v){ return boost::math::pow(v, Policy()); }\ \ template T nextafter(const T& a, const T& b){ return static_cast(boost::math::nextafter(a, b, Policy())); }\ template T float_next(const T& a){ return static_cast(boost::math::float_next(a, Policy())); }\ @@ -1633,19 +1716,19 @@ template \ inline boost::math::tools::promote_args_t owens_t(RT1 a, RT2 z){ return boost::math::owens_t(a, z, Policy()); }\ \ template \ - inline std::complex::result_type> cyl_hankel_1(T1 v, T2 x)\ + inline BOOST_MATH_GPU_ENABLED boost::math::complex::result_type> cyl_hankel_1(T1 v, T2 x)\ { return boost::math::cyl_hankel_1(v, x, Policy()); }\ \ template \ - inline std::complex::result_type> cyl_hankel_2(T1 v, T2 x)\ + inline BOOST_MATH_GPU_ENABLED boost::math::complex::result_type> cyl_hankel_2(T1 v, T2 x)\ { return boost::math::cyl_hankel_2(v, x, Policy()); }\ \ template \ - inline std::complex::result_type> sph_hankel_1(T1 v, T2 x)\ + inline BOOST_MATH_GPU_ENABLED boost::math::complex::result_type> sph_hankel_1(T1 v, T2 x)\ { return boost::math::sph_hankel_1(v, x, Policy()); }\ \ template \ - inline std::complex::result_type> sph_hankel_2(T1 v, T2 x)\ + inline BOOST_MATH_GPU_ENABLED boost::math::complex::result_type> sph_hankel_2(T1 v, T2 x)\ { return boost::math::sph_hankel_2(v, x, Policy()); }\ \ template \ @@ -1749,33 +1832,33 @@ template \ { return boost::math::jacobi_theta4m1tau(z, q, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t airy_ai(T x)\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t airy_ai(T x)\ { return boost::math::airy_ai(x, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t airy_bi(T x)\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t airy_bi(T x)\ { return boost::math::airy_bi(x, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t airy_ai_prime(T x)\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t airy_ai_prime(T x)\ { return boost::math::airy_ai_prime(x, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t airy_bi_prime(T x)\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t airy_bi_prime(T x)\ { return boost::math::airy_bi_prime(x, Policy()); }\ \ template \ - inline T airy_ai_zero(int m)\ + BOOST_MATH_GPU_ENABLED inline T airy_ai_zero(int m)\ { return boost::math::airy_ai_zero(m, Policy()); }\ template \ - OutputIterator airy_ai_zero(int start_index, unsigned number_of_zeros, OutputIterator out_it)\ + BOOST_MATH_GPU_ENABLED OutputIterator airy_ai_zero(int start_index, unsigned number_of_zeros, OutputIterator out_it)\ { return boost::math::airy_ai_zero(start_index, number_of_zeros, out_it, Policy()); }\ \ template \ - inline T airy_bi_zero(int m)\ + BOOST_MATH_GPU_ENABLED inline T airy_bi_zero(int m)\ { return boost::math::airy_bi_zero(m, Policy()); }\ template \ - OutputIterator airy_bi_zero(int start_index, unsigned number_of_zeros, OutputIterator out_it)\ + BOOST_MATH_GPU_ENABLED OutputIterator airy_bi_zero(int start_index, unsigned number_of_zeros, OutputIterator out_it)\ { return boost::math::airy_bi_zero(start_index, number_of_zeros, out_it, Policy()); }\ \ template \ @@ -1813,6 +1896,6 @@ template \ - +#endif // BOOST_MATH_HAS_NVRTC #endif // BOOST_MATH_SPECIAL_MATH_FWD_HPP diff --git a/include/boost/math/special_functions/modf.hpp b/include/boost/math/special_functions/modf.hpp index 75e6be9f46..6e372ec9a3 100644 --- a/include/boost/math/special_functions/modf.hpp +++ b/include/boost/math/special_functions/modf.hpp @@ -1,4 +1,5 @@ // Copyright John Maddock 2007. +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -10,56 +11,60 @@ #pragma once #endif -#include #include #include +#include + +#ifndef BOOST_MATH_HAS_NVRTC +#include +#endif namespace boost{ namespace math{ template -inline T modf(const T& v, T* ipart, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T modf(const T& v, T* ipart, const Policy& pol) { *ipart = trunc(v, pol); return v - *ipart; } template -inline T modf(const T& v, T* ipart) +BOOST_MATH_GPU_ENABLED inline T modf(const T& v, T* ipart) { return modf(v, ipart, policies::policy<>()); } template -inline T modf(const T& v, int* ipart, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T modf(const T& v, int* ipart, const Policy& pol) { *ipart = itrunc(v, pol); return v - *ipart; } template -inline T modf(const T& v, int* ipart) +BOOST_MATH_GPU_ENABLED inline T modf(const T& v, int* ipart) { return modf(v, ipart, policies::policy<>()); } template -inline T modf(const T& v, long* ipart, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long* ipart, const Policy& pol) { *ipart = ltrunc(v, pol); return v - *ipart; } template -inline T modf(const T& v, long* ipart) +BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long* ipart) { return modf(v, ipart, policies::policy<>()); } template -inline T modf(const T& v, long long* ipart, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long long* ipart, const Policy& pol) { *ipart = lltrunc(v, pol); return v - *ipart; } template -inline T modf(const T& v, long long* ipart) +BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long long* ipart) { return modf(v, ipart, policies::policy<>()); } diff --git a/include/boost/math/special_functions/next.hpp b/include/boost/math/special_functions/next.hpp index 02a208e4eb..fd08162f98 100644 --- a/include/boost/math/special_functions/next.hpp +++ b/include/boost/math/special_functions/next.hpp @@ -10,6 +10,11 @@ #pragma once #endif +#include + +// TODO(mborland): Need to remove recurrsion from these algos +#ifndef BOOST_MATH_HAS_NVRTC + #include #include #include @@ -920,4 +925,6 @@ inline typename tools::promote_args::type float_advance(const T& val, int dis }} // boost math namespaces +#endif + #endif // BOOST_MATH_SPECIAL_NEXT_HPP diff --git a/include/boost/math/special_functions/pow.hpp b/include/boost/math/special_functions/pow.hpp index 9c64889977..7a1bb14eba 100644 --- a/include/boost/math/special_functions/pow.hpp +++ b/include/boost/math/special_functions/pow.hpp @@ -2,6 +2,7 @@ // Computes a power with exponent known at compile-time // (C) Copyright Bruno Lalande 2008. +// (C) Copyright Matt Borland 2024. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) @@ -12,12 +13,14 @@ #ifndef BOOST_MATH_POW_HPP #define BOOST_MATH_POW_HPP - -#include +#include #include #include #include +#ifndef BOOST_MATH_HAS_NVRTC +#include +#endif namespace boost { namespace math { @@ -34,7 +37,7 @@ template struct positive_power { template - static BOOST_MATH_CXX14_CONSTEXPR T result(T base) + BOOST_MATH_GPU_ENABLED static constexpr T result(T base) { T power = positive_power::result(base); return power * power; @@ -45,7 +48,7 @@ template struct positive_power { template - static BOOST_MATH_CXX14_CONSTEXPR T result(T base) + BOOST_MATH_GPU_ENABLED static constexpr T result(T base) { T power = positive_power::result(base); return base * power * power; @@ -56,7 +59,7 @@ template <> struct positive_power<1, 1> { template - static BOOST_MATH_CXX14_CONSTEXPR T result(T base){ return base; } + BOOST_MATH_GPU_ENABLED static constexpr T result(T base){ return base; } }; @@ -64,7 +67,7 @@ template struct power_if_positive { template - static BOOST_MATH_CXX14_CONSTEXPR T result(T base, const Policy&) + BOOST_MATH_GPU_ENABLED static constexpr T result(T base, const Policy&) { return positive_power::result(base); } }; @@ -72,7 +75,7 @@ template struct power_if_positive { template - static BOOST_MATH_CXX14_CONSTEXPR T result(T base, const Policy& policy) + BOOST_MATH_GPU_ENABLED static constexpr T result(T base, const Policy& policy) { if (base == 0) { @@ -91,7 +94,7 @@ template <> struct power_if_positive<0, true> { template - static BOOST_MATH_CXX14_CONSTEXPR T result(T base, const Policy& policy) + BOOST_MATH_GPU_ENABLED static constexpr T result(T base, const Policy& policy) { if (base == 0) { @@ -120,14 +123,14 @@ struct select_power_if_positive template -BOOST_MATH_CXX14_CONSTEXPR inline typename tools::promote_args::type pow(T base, const Policy& policy) +BOOST_MATH_GPU_ENABLED constexpr inline typename tools::promote_args::type pow(T base, const Policy& policy) { using result_type = typename tools::promote_args::type; return detail::select_power_if_positive::type::result(static_cast(base), policy); } template -BOOST_MATH_CXX14_CONSTEXPR inline typename tools::promote_args::type pow(T base) +BOOST_MATH_GPU_ENABLED constexpr inline typename tools::promote_args::type pow(T base) { return pow(base, policies::policy<>()); } #ifdef _MSC_VER diff --git a/include/boost/math/special_functions/powm1.hpp b/include/boost/math/special_functions/powm1.hpp index e52277b16d..80d02dc299 100644 --- a/include/boost/math/special_functions/powm1.hpp +++ b/include/boost/math/special_functions/powm1.hpp @@ -1,4 +1,5 @@ // (C) Copyright John Maddock 2006. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -12,6 +13,7 @@ #pragma warning(disable:4702) // Unreachable code (release mode only warning) #endif +#include #include #include #include @@ -22,32 +24,23 @@ namespace boost{ namespace math{ namespace detail{ template -inline T powm1_imp(const T x, const T y, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T powm1_imp(const T x, const T y, const Policy& pol) { BOOST_MATH_STD_USING - static const char* function = "boost::math::powm1<%1%>(%1%, %1%)"; - if (x > 0) + constexpr auto function = "boost::math::powm1<%1%>(%1%, %1%)"; + + if ((fabs(y * (x - 1)) < T(0.5)) || (fabs(y) < T(0.2))) { - if ((fabs(y * (x - 1)) < T(0.5)) || (fabs(y) < T(0.2))) - { - // We don't have any good/quick approximation for log(x) * y - // so just try it and see: - T l = y * log(x); - if (l < T(0.5)) - return boost::math::expm1(l, pol); - if (l > boost::math::tools::log_max_value()) - return boost::math::policies::raise_overflow_error(function, nullptr, pol); - // fall through.... - } - } - else if ((boost::math::signbit)(x)) // Need to error check -0 here as well - { - // y had better be an integer: - if (boost::math::trunc(y) != y) - return boost::math::policies::raise_domain_error(function, "For non-integral exponent, expected base > 0 but got %1%", x, pol); - if (boost::math::trunc(y / 2) == y / 2) - return powm1_imp(T(-x), y, pol); + // We don't have any good/quick approximation for log(x) * y + // so just try it and see: + T l = y * log(x); + if (l < T(0.5)) + return boost::math::expm1(l, pol); + if (l > boost::math::tools::log_max_value()) + return boost::math::policies::raise_overflow_error(function, nullptr, pol); + // fall through.... } + T result = pow(x, y) - 1; if((boost::math::isinf)(result)) return result < 0 ? -boost::math::policies::raise_overflow_error(function, nullptr, pol) : boost::math::policies::raise_overflow_error(function, nullptr, pol); @@ -56,22 +49,41 @@ inline T powm1_imp(const T x, const T y, const Policy& pol) return result; } +template +BOOST_MATH_GPU_ENABLED inline T powm1_imp_dispatch(const T x, const T y, const Policy& pol) +{ + BOOST_MATH_STD_USING + + if ((boost::math::signbit)(x)) // Need to error check -0 here as well + { + constexpr auto function = "boost::math::powm1<%1%>(%1%, %1%)"; + + // y had better be an integer: + if (boost::math::trunc(y) != y) + return boost::math::policies::raise_domain_error(function, "For non-integral exponent, expected base > 0 but got %1%", x, pol); + if (boost::math::trunc(y / 2) == y / 2) + return powm1_imp(T(-x), T(y), pol); + } + + return powm1_imp(T(x), T(y), pol); +} + } // detail template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type powm1(const T1 a, const T2 z) { typedef typename tools::promote_args::type result_type; - return detail::powm1_imp(static_cast(a), static_cast(z), policies::policy<>()); + return detail::powm1_imp_dispatch(static_cast(a), static_cast(z), policies::policy<>()); } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type powm1(const T1 a, const T2 z, const Policy& pol) { typedef typename tools::promote_args::type result_type; - return detail::powm1_imp(static_cast(a), static_cast(z), pol); + return detail::powm1_imp_dispatch(static_cast(a), static_cast(z), pol); } } // namespace math diff --git a/include/boost/math/special_functions/round.hpp b/include/boost/math/special_functions/round.hpp index e74acba85b..bb99da7e31 100644 --- a/include/boost/math/special_functions/round.hpp +++ b/include/boost/math/special_functions/round.hpp @@ -12,6 +12,9 @@ #endif #include + +#ifndef BOOST_MATH_HAS_NVRTC + #include #include #include @@ -30,7 +33,7 @@ namespace boost{ namespace math{ namespace detail{ template -inline tools::promote_args_t round(const T& v, const Policy& pol, const std::false_type&) +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t round(const T& v, const Policy& pol, const std::false_type&) { BOOST_MATH_STD_USING using result_type = tools::promote_args_t; @@ -65,7 +68,7 @@ inline tools::promote_args_t round(const T& v, const Policy& pol, const std:: } } template -inline tools::promote_args_t round(const T& v, const Policy&, const std::true_type&) +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t round(const T& v, const Policy&, const std::true_type&) { return v; } @@ -73,12 +76,12 @@ inline tools::promote_args_t round(const T& v, const Policy&, const std::true } // namespace detail template -inline tools::promote_args_t round(const T& v, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t round(const T& v, const Policy& pol) { return detail::round(v, pol, std::integral_constant::value>()); } template -inline tools::promote_args_t round(const T& v) +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t round(const T& v) { return round(v, policies::policy<>()); } @@ -103,7 +106,7 @@ inline int iround(const T& v, const Policy& pol) result_type r = boost::math::round(v, pol); - #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP + #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT) if constexpr (std::is_arithmetic_v #ifdef BOOST_MATH_FLOAT128_TYPE && !std::is_same_v @@ -127,7 +130,7 @@ inline int iround(const T& v, const Policy& pol) } } #else - static const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); + BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); if (r >= max_val || r < -max_val) { @@ -138,20 +141,20 @@ inline int iround(const T& v, const Policy& pol) return static_cast(r); } template -inline int iround(const T& v) +BOOST_MATH_GPU_ENABLED inline int iround(const T& v) { return iround(v, policies::policy<>()); } template -inline long lround(const T& v, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline long lround(const T& v, const Policy& pol) { BOOST_MATH_STD_USING using result_type = tools::promote_args_t; result_type r = boost::math::round(v, pol); - #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP + #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT) if constexpr (std::is_arithmetic_v #ifdef BOOST_MATH_FLOAT128_TYPE && !std::is_same_v @@ -175,7 +178,7 @@ inline long lround(const T& v, const Policy& pol) } } #else - static const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); + BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); if (r >= max_val || r < -max_val) { @@ -186,20 +189,20 @@ inline long lround(const T& v, const Policy& pol) return static_cast(r); } template -inline long lround(const T& v) +BOOST_MATH_GPU_ENABLED inline long lround(const T& v) { return lround(v, policies::policy<>()); } template -inline long long llround(const T& v, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline long long llround(const T& v, const Policy& pol) { BOOST_MATH_STD_USING using result_type = boost::math::tools::promote_args_t; result_type r = boost::math::round(v, pol); - #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP + #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT) if constexpr (std::is_arithmetic_v #ifdef BOOST_MATH_FLOAT128_TYPE && !std::is_same_v @@ -223,7 +226,7 @@ inline long long llround(const T& v, const Policy& pol) } } #else - static const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); + BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); if (r >= max_val || r < -max_val) { @@ -234,11 +237,117 @@ inline long long llround(const T& v, const Policy& pol) return static_cast(r); } template -inline long long llround(const T& v) +BOOST_MATH_GPU_ENABLED inline long long llround(const T& v) { return llround(v, policies::policy<>()); } }} // namespaces +#else // Specialized NVRTC overloads + +namespace boost { +namespace math { + +template +BOOST_MATH_GPU_ENABLED T round(T x) +{ + return ::round(x); +} + +template <> +BOOST_MATH_GPU_ENABLED float round(float x) +{ + return ::roundf(x); +} + +template +BOOST_MATH_GPU_ENABLED T round(T x, const Policy&) +{ + return ::round(x); +} + +template +BOOST_MATH_GPU_ENABLED float round(float x, const Policy&) +{ + return ::roundf(x); +} + +template +BOOST_MATH_GPU_ENABLED int iround(T x) +{ + return static_cast(::lround(x)); +} + +template <> +BOOST_MATH_GPU_ENABLED int iround(float x) +{ + return static_cast(::lroundf(x)); +} + +template +BOOST_MATH_GPU_ENABLED int iround(T x, const Policy&) +{ + return static_cast(::lround(x)); +} + +template +BOOST_MATH_GPU_ENABLED int iround(float x, const Policy&) +{ + return static_cast(::lroundf(x)); +} + +template +BOOST_MATH_GPU_ENABLED long lround(T x) +{ + return ::lround(x); +} + +template <> +BOOST_MATH_GPU_ENABLED long lround(float x) +{ + return ::lroundf(x); +} + +template +BOOST_MATH_GPU_ENABLED long lround(T x, const Policy&) +{ + return ::lround(x); +} + +template +BOOST_MATH_GPU_ENABLED long lround(float x, const Policy&) +{ + return ::lroundf(x); +} + +template +BOOST_MATH_GPU_ENABLED long long llround(T x) +{ + return ::llround(x); +} + +template <> +BOOST_MATH_GPU_ENABLED long long llround(float x) +{ + return ::llroundf(x); +} + +template +BOOST_MATH_GPU_ENABLED long long llround(T x, const Policy&) +{ + return ::llround(x); +} + +template +BOOST_MATH_GPU_ENABLED long long llround(float x, const Policy&) +{ + return ::llroundf(x); +} + +} // Namespace math +} // Namespace boost + +#endif // BOOST_MATH_HAS_NVRTC + #endif // BOOST_MATH_ROUND_HPP diff --git a/include/boost/math/special_functions/sign.hpp b/include/boost/math/special_functions/sign.hpp index 8f9fc4793a..4f76522654 100644 --- a/include/boost/math/special_functions/sign.hpp +++ b/include/boost/math/special_functions/sign.hpp @@ -1,6 +1,7 @@ // (C) Copyright John Maddock 2006. // (C) Copyright Johan Rade 2006. // (C) Copyright Paul A. Bristow 2011 (added changesign). +// (C) Copyright Matt Borland 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file @@ -13,6 +14,8 @@ #pragma once #endif +#ifndef __CUDACC_RTC__ + #include #include #include @@ -25,9 +28,10 @@ namespace detail { #ifdef BOOST_MATH_USE_STD_FPCLASSIFY template - inline int signbit_impl(T x, native_tag const&) + BOOST_MATH_GPU_ENABLED inline int signbit_impl(T x, native_tag const&) { - return (std::signbit)(x) ? 1 : 0; + using std::signbit; + return (signbit)(x) ? 1 : 0; } #endif @@ -35,13 +39,13 @@ namespace detail { // signed zero or NaN. template - inline int signbit_impl(T x, generic_tag const&) + BOOST_MATH_GPU_ENABLED inline int signbit_impl(T x, generic_tag const&) { return x < 0; } template - inline int signbit_impl(T x, generic_tag const&) + BOOST_MATH_GPU_ENABLED inline int signbit_impl(T x, generic_tag const&) { return x < 0; } @@ -65,7 +69,7 @@ namespace detail { #endif template - inline int signbit_impl(T x, ieee_copy_all_bits_tag const&) + BOOST_MATH_GPU_ENABLED inline int signbit_impl(T x, ieee_copy_all_bits_tag const&) { typedef typename fp_traits::type traits; @@ -75,7 +79,7 @@ namespace detail { } template - inline int signbit_impl(T x, ieee_copy_leading_bits_tag const&) + BOOST_MATH_GPU_ENABLED inline int signbit_impl(T x, ieee_copy_leading_bits_tag const&) { typedef typename fp_traits::type traits; @@ -91,13 +95,13 @@ namespace detail { // signed zero or NaN. template - inline T (changesign_impl)(T x, generic_tag const&) + BOOST_MATH_GPU_ENABLED inline T (changesign_impl)(T x, generic_tag const&) { return -x; } template - inline T (changesign_impl)(T x, generic_tag const&) + BOOST_MATH_GPU_ENABLED inline T (changesign_impl)(T x, generic_tag const&) { return -x; } @@ -124,7 +128,7 @@ namespace detail { #endif template - inline T changesign_impl(T x, ieee_copy_all_bits_tag const&) + BOOST_MATH_GPU_ENABLED inline T changesign_impl(T x, ieee_copy_all_bits_tag const&) { typedef typename fp_traits::sign_change_type traits; @@ -136,7 +140,7 @@ namespace detail { } template - inline T (changesign_impl)(T x, ieee_copy_leading_bits_tag const&) + BOOST_MATH_GPU_ENABLED inline T (changesign_impl)(T x, ieee_copy_leading_bits_tag const&) { typedef typename fp_traits::sign_change_type traits; @@ -150,7 +154,8 @@ namespace detail { } // namespace detail -template int (signbit)(T x) +template +BOOST_MATH_GPU_ENABLED int (signbit)(T x) { typedef typename detail::fp_traits::type traits; typedef typename traits::method method; @@ -160,12 +165,13 @@ template int (signbit)(T x) } template -inline int sign BOOST_NO_MACRO_EXPAND(const T& z) +BOOST_MATH_GPU_ENABLED inline int sign BOOST_NO_MACRO_EXPAND(const T& z) { return (z == 0) ? 0 : (boost::math::signbit)(z) ? -1 : 1; } -template typename tools::promote_args_permissive::type (changesign)(const T& x) +template +BOOST_MATH_GPU_ENABLED typename tools::promote_args_permissive::type (changesign)(const T& x) { //!< \brief return unchanged binary pattern of x, except for change of sign bit. typedef typename detail::fp_traits::sign_change_type traits; typedef typename traits::method method; @@ -176,7 +182,7 @@ template typename tools::promote_args_permissive::type (changesign) } template -inline typename tools::promote_args_permissive::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args_permissive::type copysign BOOST_NO_MACRO_EXPAND(const T& x, const U& y) { BOOST_MATH_STD_USING @@ -188,6 +194,47 @@ inline typename tools::promote_args_permissive::type } // namespace math } // namespace boost +#else // NVRTC alias versions + +#include + +namespace boost { +namespace math { + +template +BOOST_MATH_GPU_ENABLED int signbit(T x) +{ + return ::signbit(x); +} + +template +BOOST_MATH_GPU_ENABLED T changesign(T x) +{ + return -x; +} + +template +BOOST_MATH_GPU_ENABLED T copysign(T x, T y) +{ + return ::copysign(x, y); +} + +template <> +BOOST_MATH_GPU_ENABLED float copysign(float x, float y) +{ + return ::copysignf(x, y); +} + +template +BOOST_MATH_GPU_ENABLED T sign(T z) +{ + return (z == 0) ? 0 : ::signbit(z) ? -1 : 1; +} + +} // namespace math +} // namespace boost + +#endif // __CUDACC_RTC__ #endif // BOOST_MATH_TOOLS_SIGN_HPP diff --git a/include/boost/math/special_functions/sin_pi.hpp b/include/boost/math/special_functions/sin_pi.hpp index 5b8eb6fcf2..e59e232e6d 100644 --- a/include/boost/math/special_functions/sin_pi.hpp +++ b/include/boost/math/special_functions/sin_pi.hpp @@ -1,4 +1,5 @@ // Copyright (c) 2007 John Maddock +// Copyright (c) 2024 Matt Borland // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -10,9 +11,14 @@ #pragma once #endif +#include + +#ifndef BOOST_MATH_HAS_NVRTC + #include #include -#include +#include +#include #include #include #include @@ -21,11 +27,9 @@ namespace boost{ namespace math{ namespace detail{ template -inline T sin_pi_imp(T x, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T sin_pi_imp(T x, const Policy&) { BOOST_MATH_STD_USING // ADL of std names - if(x < 0) - return -sin_pi_imp(T(-x), pol); // sin of pi*x: if(x < T(0.5)) return sin(constants::pi() * x); @@ -39,7 +43,7 @@ inline T sin_pi_imp(T x, const Policy& pol) invert = false; T rem = floor(x); - if(abs(floor(rem/2)*2 - rem) > std::numeric_limits::epsilon()) + if(abs(floor(rem/2)*2 - rem) > boost::math::numeric_limits::epsilon()) { invert = !invert; } @@ -53,10 +57,23 @@ inline T sin_pi_imp(T x, const Policy& pol) return invert ? T(-rem) : rem; } +template +BOOST_MATH_GPU_ENABLED inline T sin_pi_dispatch(T x, const Policy& pol) +{ + if (x < T(0)) + { + return -sin_pi_imp(T(-x), pol); + } + else + { + return sin_pi_imp(T(x), pol); + } +} + } // namespace detail template -inline typename tools::promote_args::type sin_pi(T x, const Policy&) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type sin_pi(T x, const Policy&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -69,7 +86,7 @@ inline typename tools::promote_args::type sin_pi(T x, const Policy&) // We want to ignore overflows since the result is in [-1,1] and the // check slows the code down considerably. policies::overflow_error >::type forwarding_policy; - return policies::checked_narrowing_cast(boost::math::detail::sin_pi_imp(x, forwarding_policy()), "sin_pi"); + return policies::checked_narrowing_cast(boost::math::detail::sin_pi_dispatch(x, forwarding_policy()), "sin_pi"); } template @@ -80,5 +97,40 @@ inline typename tools::promote_args::type sin_pi(T x) } // namespace math } // namespace boost + +#else // Special handling for NVRTC + +namespace boost { +namespace math { + +template +BOOST_MATH_GPU_ENABLED auto sin_pi(T x) +{ + return ::sinpi(x); +} + +template <> +BOOST_MATH_GPU_ENABLED auto sin_pi(float x) +{ + return ::sinpif(x); +} + +template +BOOST_MATH_GPU_ENABLED auto sin_pi(T x, const Policy&) +{ + return ::sinpi(x); +} + +template +BOOST_MATH_GPU_ENABLED auto sin_pi(float x, const Policy&) +{ + return ::sinpif(x); +} + +} // namespace math +} // namespace boost + +#endif // BOOST_MATH_HAS_NVRTC + #endif diff --git a/include/boost/math/special_functions/sinc.hpp b/include/boost/math/special_functions/sinc.hpp index ff1b2e966b..0c18ac3468 100644 --- a/include/boost/math/special_functions/sinc.hpp +++ b/include/boost/math/special_functions/sinc.hpp @@ -17,13 +17,13 @@ #include #include +#include #include -#include #include -#include -#include -#include -#include + +#ifndef BOOST_MATH_HAS_NVRTC +#include +#endif // These are the the "Sinus Cardinal" functions. @@ -36,7 +36,7 @@ namespace boost // This is the "Sinus Cardinal" of index Pi. template - inline T sinc_pi_imp(const T x) + BOOST_MATH_GPU_ENABLED inline T sinc_pi_imp(const T x) { BOOST_MATH_STD_USING @@ -44,7 +44,7 @@ namespace boost { return 0; } - else if (abs(x) >= 3.3 * tools::forth_root_epsilon()) + else if (abs(x) >= T(3.3) * tools::forth_root_epsilon()) { return(sin(x)/x); } @@ -58,24 +58,23 @@ namespace boost } // namespace detail template - inline typename tools::promote_args::type sinc_pi(T x) + BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type sinc_pi(T x) { typedef typename tools::promote_args::type result_type; return detail::sinc_pi_imp(static_cast(x)); } template - inline typename tools::promote_args::type sinc_pi(T x, const Policy&) + BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type sinc_pi(T x, const Policy&) { typedef typename tools::promote_args::type result_type; return detail::sinc_pi_imp(static_cast(x)); } template class U> - inline U sinc_pi(const U x) + BOOST_MATH_GPU_ENABLED inline U sinc_pi(const U x) { BOOST_MATH_STD_USING - using ::std::numeric_limits; T const taylor_0_bound = tools::epsilon(); T const taylor_2_bound = tools::root_epsilon(); @@ -88,11 +87,11 @@ namespace boost else { // approximation by taylor series in x at 0 up to order 0 -#ifdef __MWERKS__ + #ifdef __MWERKS__ U result = static_cast >(1); -#else + #else U result = U(1); -#endif + #endif if (abs(x) >= taylor_0_bound) { @@ -113,7 +112,7 @@ namespace boost } template class U, class Policy> - inline U sinc_pi(const U x, const Policy&) + BOOST_MATH_GPU_ENABLED inline U sinc_pi(const U x, const Policy&) { return sinc_pi(x); } diff --git a/include/boost/math/special_functions/sqrt1pm1.hpp b/include/boost/math/special_functions/sqrt1pm1.hpp index 041916a53f..4d8aeb38cf 100644 --- a/include/boost/math/special_functions/sqrt1pm1.hpp +++ b/include/boost/math/special_functions/sqrt1pm1.hpp @@ -10,6 +10,7 @@ #pragma once #endif +#include #include #include #include @@ -21,7 +22,7 @@ namespace boost{ namespace math{ template -inline typename tools::promote_args::type sqrt1pm1(const T& val, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type sqrt1pm1(const T& val, const Policy& pol) { typedef typename tools::promote_args::type result_type; BOOST_MATH_STD_USING @@ -32,7 +33,7 @@ inline typename tools::promote_args::type sqrt1pm1(const T& val, const Policy } template -inline typename tools::promote_args::type sqrt1pm1(const T& val) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type sqrt1pm1(const T& val) { return sqrt1pm1(val, policies::policy<>()); } diff --git a/include/boost/math/special_functions/trigamma.hpp b/include/boost/math/special_functions/trigamma.hpp index f74b43db1f..61a60b502f 100644 --- a/include/boost/math/special_functions/trigamma.hpp +++ b/include/boost/math/special_functions/trigamma.hpp @@ -1,4 +1,5 @@ // (C) Copyright John Maddock 2006. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -10,14 +11,22 @@ #pragma once #endif -#include +#include #include -#include #include +#include +#include +#include #include #include -#include +#include +#include + +#ifndef BOOST_MATH_HAS_NVRTC +#include #include +#include +#endif #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128) // @@ -33,15 +42,24 @@ namespace boost{ namespace math{ namespace detail{ +// TODO(mborland): Temporary for NVRTC +#ifndef BOOST_MATH_HAS_NVRTC template T polygamma_imp(const int n, T x, const Policy &pol); template -T trigamma_prec(T x, const std::integral_constant*, const Policy&) +T trigamma_prec(T x, const Policy& pol, const boost::math::integral_constant&) +{ + return polygamma_imp(1, x, pol); +} +#endif + +template +BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const Policy&, const boost::math::integral_constant&) { // Max error in interpolated form: 3.736e-017 - static const T offset = BOOST_MATH_BIG_CONSTANT(T, 53, 2.1093254089355469); - static const T P_1_2[] = { + BOOST_MATH_STATIC const T offset = BOOST_MATH_BIG_CONSTANT(T, 53, 2.1093254089355469); + BOOST_MATH_STATIC const T P_1_2[] = { BOOST_MATH_BIG_CONSTANT(T, 53, -1.1093280605946045), BOOST_MATH_BIG_CONSTANT(T, 53, -3.8310674472619321), BOOST_MATH_BIG_CONSTANT(T, 53, -3.3703848401898283), @@ -49,7 +67,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) BOOST_MATH_BIG_CONSTANT(T, 53, 1.6638069578676164), BOOST_MATH_BIG_CONSTANT(T, 53, 0.64468386819102836), }; - static const T Q_1_2[] = { + BOOST_MATH_STATIC const T Q_1_2[] = { BOOST_MATH_BIG_CONSTANT(T, 53, 1.0), BOOST_MATH_BIG_CONSTANT(T, 53, 3.4535389668541151), BOOST_MATH_BIG_CONSTANT(T, 53, 4.5208926987851437), @@ -58,7 +76,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) BOOST_MATH_BIG_CONSTANT(T, 53, -0.20314516859987728e-6), }; // Max error in interpolated form: 1.159e-017 - static const T P_2_4[] = { + BOOST_MATH_STATIC const T P_2_4[] = { BOOST_MATH_BIG_CONSTANT(T, 53, -0.13803835004508849e-7), BOOST_MATH_BIG_CONSTANT(T, 53, 0.50000049158540261), BOOST_MATH_BIG_CONSTANT(T, 53, 1.6077979838469348), @@ -66,7 +84,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) BOOST_MATH_BIG_CONSTANT(T, 53, 2.0534873203680393), BOOST_MATH_BIG_CONSTANT(T, 53, 0.74566981111565923), }; - static const T Q_2_4[] = { + BOOST_MATH_STATIC const T Q_2_4[] = { BOOST_MATH_BIG_CONSTANT(T, 53, 1.0), BOOST_MATH_BIG_CONSTANT(T, 53, 2.8822787662376169), BOOST_MATH_BIG_CONSTANT(T, 53, 4.1681660554090917), @@ -77,7 +95,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) // Maximum Deviation Found: 6.896e-018 // Expected Error Term : -6.895e-018 // Maximum Relative Change in Control Points : 8.497e-004 - static const T P_4_inf[] = { + BOOST_MATH_STATIC const T P_4_inf[] = { static_cast(0.68947581948701249e-17L), static_cast(0.49999999999998975L), static_cast(1.0177274392923795L), @@ -86,7 +104,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) static_cast(1.5897035272532764L), static_cast(0.40154388356961734L), }; - static const T Q_4_inf[] = { + BOOST_MATH_STATIC const T Q_4_inf[] = { static_cast(1.0L), static_cast(1.7021215452463932L), static_cast(4.4290431747556469L), @@ -110,11 +128,11 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) } template -T trigamma_prec(T x, const std::integral_constant*, const Policy&) +BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const Policy&, const boost::math::integral_constant&) { // Max error in interpolated form: 1.178e-020 - static const T offset_1_2 = BOOST_MATH_BIG_CONSTANT(T, 64, 2.109325408935546875); - static const T P_1_2[] = { + BOOST_MATH_STATIC const T offset_1_2 = BOOST_MATH_BIG_CONSTANT(T, 64, 2.109325408935546875); + BOOST_MATH_STATIC const T P_1_2[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -1.10932535608960258341), BOOST_MATH_BIG_CONSTANT(T, 64, -4.18793841543017129052), BOOST_MATH_BIG_CONSTANT(T, 64, -4.63865531898487734531), @@ -123,7 +141,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) BOOST_MATH_BIG_CONSTANT(T, 64, 1.21172611429185622377), BOOST_MATH_BIG_CONSTANT(T, 64, 0.259635673503366427284), }; - static const T Q_1_2[] = { + BOOST_MATH_STATIC const T Q_1_2[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, 3.77521119359546982995), BOOST_MATH_BIG_CONSTANT(T, 64, 5.664338024578956321), @@ -133,7 +151,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) BOOST_MATH_BIG_CONSTANT(T, 64, 0.629642219810618032207e-8), }; // Max error in interpolated form: 3.912e-020 - static const T P_2_8[] = { + BOOST_MATH_STATIC const T P_2_8[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.387540035162952880976e-11), BOOST_MATH_BIG_CONSTANT(T, 64, 0.500000000276430504), BOOST_MATH_BIG_CONSTANT(T, 64, 3.21926880986360957306), @@ -143,7 +161,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) BOOST_MATH_BIG_CONSTANT(T, 64, 13.4346512182925923978), BOOST_MATH_BIG_CONSTANT(T, 64, 3.98656291026448279118), }; - static const T Q_2_8[] = { + BOOST_MATH_STATIC const T Q_2_8[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, 6.10520430478613667724), BOOST_MATH_BIG_CONSTANT(T, 64, 18.475001060603645512), @@ -156,7 +174,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) // Maximum Deviation Found: 2.635e-020 // Expected Error Term : 2.635e-020 // Maximum Relative Change in Control Points : 1.791e-003 - static const T P_8_inf[] = { + BOOST_MATH_STATIC const T P_8_inf[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.263527875092466899848e-19), BOOST_MATH_BIG_CONSTANT(T, 64, 0.500000000000000058145), BOOST_MATH_BIG_CONSTANT(T, 64, 0.0730121433777364138677), @@ -164,7 +182,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) BOOST_MATH_BIG_CONSTANT(T, 64, 0.0517092358874932620529), BOOST_MATH_BIG_CONSTANT(T, 64, 1.07995383547483921121), }; - static const T Q_8_inf[] = { + BOOST_MATH_STATIC const T Q_8_inf[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, -0.187309046577818095504), BOOST_MATH_BIG_CONSTANT(T, 64, 3.95255391645238842975), @@ -188,7 +206,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) } template -T trigamma_prec(T x, const std::integral_constant*, const Policy&) +BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const Policy&, const boost::math::integral_constant&) { // Max error in interpolated form: 1.916e-035 @@ -356,8 +374,8 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) return (1 + tools::evaluate_polynomial(P_16_inf, y) / tools::evaluate_polynomial(Q_16_inf, y)) / x; } -template -T trigamma_imp(T x, const Tag* t, const Policy& pol) +template +BOOST_MATH_GPU_ENABLED T trigamma_dispatch(T x, const Policy& pol, const Tag& tag) { // // This handles reflection of negative arguments, and all our @@ -373,27 +391,29 @@ T trigamma_imp(T x, const Tag* t, const Policy& pol) { // Reflect: T z = 1 - x; + + if(z < 1) + { + result = 1 / (z * z); + z += 1; + } + // Argument reduction for tan: if(floor(x) == x) { return policies::raise_pole_error("boost::math::trigamma<%1%>(%1%)", nullptr, (1-x), pol); } T s = fabs(x) < fabs(z) ? boost::math::sin_pi(x, pol) : boost::math::sin_pi(z, pol); - return -trigamma_imp(z, t, pol) + boost::math::pow<2>(constants::pi()) / (s * s); + return result - trigamma_prec(T(z), pol, tag) + boost::math::pow<2>(constants::pi()) / (s * s); } if(x < 1) { result = 1 / (x * x); x += 1; } - return result + trigamma_prec(x, t, pol); + return result + trigamma_prec(x, pol, tag); } -template -T trigamma_imp(T x, const std::integral_constant*, const Policy& pol) -{ - return polygamma_imp(1, x, pol); -} // // Initializer: ensure all our constants are initialized prior to the first call of main: // @@ -402,22 +422,24 @@ struct trigamma_initializer { struct init { - init() + BOOST_MATH_GPU_ENABLED init() { typedef typename policies::precision::type precision_type; - do_init(std::integral_constant()); + do_init(boost::math::integral_constant()); } - void do_init(const std::true_type&) + BOOST_MATH_GPU_ENABLED void do_init(const boost::math::true_type&) { boost::math::trigamma(T(2.5), Policy()); } - void do_init(const std::false_type&){} - void force_instantiate()const{} + BOOST_MATH_GPU_ENABLED void do_init(const boost::math::false_type&){} + BOOST_MATH_GPU_ENABLED void force_instantiate()const{} }; static const init initializer; - static void force_instantiate() + BOOST_MATH_GPU_ENABLED static void force_instantiate() { + #ifndef BOOST_MATH_HAS_GPU_SUPPORT initializer.force_instantiate(); + #endif } }; @@ -427,13 +449,13 @@ const typename trigamma_initializer::init trigamma_initializer -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type trigamma(T x, const Policy&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; typedef typename policies::precision::type precision_type; - typedef std::integral_constant::type // Force initialization of constants: detail::trigamma_initializer::force_instantiate(); - return policies::checked_narrowing_cast(detail::trigamma_imp( + return policies::checked_narrowing_cast(detail::trigamma_dispatch( static_cast(x), - static_cast(nullptr), forwarding_policy()), "boost::math::trigamma<%1%>(%1%)"); + forwarding_policy(), + tag_type()), "boost::math::trigamma<%1%>(%1%)"); } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type trigamma(T x) { return trigamma(x, policies::policy<>()); diff --git a/include/boost/math/special_functions/trunc.hpp b/include/boost/math/special_functions/trunc.hpp index a084de560b..b52f4f321c 100644 --- a/include/boost/math/special_functions/trunc.hpp +++ b/include/boost/math/special_functions/trunc.hpp @@ -11,9 +11,14 @@ #pragma once #endif +#include +#include +#include + +#ifndef BOOST_MATH_HAS_NVRTC + #include #include -#include #include #include #include @@ -27,7 +32,7 @@ namespace boost{ namespace math{ namespace detail{ template -inline tools::promote_args_t trunc(const T& v, const Policy& pol, const std::false_type&) +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t trunc(const T& v, const Policy& pol, const std::false_type&) { BOOST_MATH_STD_USING using result_type = tools::promote_args_t; @@ -39,23 +44,66 @@ inline tools::promote_args_t trunc(const T& v, const Policy& pol, const std:: } template -inline tools::promote_args_t trunc(const T& v, const Policy&, const std::true_type&) +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t trunc(const T& v, const Policy&, const std::true_type&) { return v; } -} +} // Namespace detail template -inline tools::promote_args_t trunc(const T& v, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t trunc(const T& v, const Policy& pol) { return detail::trunc(v, pol, std::integral_constant::value>()); } + template -inline tools::promote_args_t trunc(const T& v) +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t trunc(const T& v) { return trunc(v, policies::policy<>()); } + +#else // Special handling for nvrtc + +namespace boost { +namespace math { + +namespace detail { + +template +BOOST_MATH_GPU_ENABLED double trunc_impl(T x) +{ + return static_cast(x); +} + +BOOST_MATH_GPU_ENABLED inline float trunc_impl(float x) +{ + return ::truncf(x); +} + +BOOST_MATH_GPU_ENABLED inline double trunc_impl(double x) +{ + return ::trunc(x); +} + +} // Namespace detail + +template +BOOST_MATH_GPU_ENABLED auto trunc(T x, const Policy&) +{ + return detail::trunc_impl(x); +} + +template +BOOST_MATH_GPU_ENABLED auto trunc(T x) +{ + return detail::trunc_impl(x); +} + +#endif + +#ifndef BOOST_MATH_HAS_NVRTC + // // The following functions will not compile unless T has an // implicit conversion to the integer types. For user-defined @@ -70,13 +118,13 @@ inline tools::promote_args_t trunc(const T& v) // https://stackoverflow.com/questions/27442885/syntax-error-with-stdnumeric-limitsmax // template -inline int itrunc(const T& v, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline int itrunc(const T& v, const Policy& pol) { BOOST_MATH_STD_USING using result_type = tools::promote_args_t; result_type r = boost::math::trunc(v, pol); - #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP + #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT) if constexpr (std::is_arithmetic_v #ifdef BOOST_MATH_FLOAT128_TYPE && !std::is_same_v @@ -100,7 +148,7 @@ inline int itrunc(const T& v, const Policy& pol) } } #else - static const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); + BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); if (r >= max_val || r < -max_val) { @@ -110,20 +158,21 @@ inline int itrunc(const T& v, const Policy& pol) return static_cast(r); } + template -inline int itrunc(const T& v) +BOOST_MATH_GPU_ENABLED inline int itrunc(const T& v) { return itrunc(v, policies::policy<>()); } template -inline long ltrunc(const T& v, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline long ltrunc(const T& v, const Policy& pol) { BOOST_MATH_STD_USING using result_type = tools::promote_args_t; result_type r = boost::math::trunc(v, pol); - #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP + #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT) if constexpr (std::is_arithmetic_v #ifdef BOOST_MATH_FLOAT128_TYPE && !std::is_same_v @@ -147,7 +196,7 @@ inline long ltrunc(const T& v, const Policy& pol) } } #else - static const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); + BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); if (r >= max_val || r < -max_val) { @@ -157,20 +206,21 @@ inline long ltrunc(const T& v, const Policy& pol) return static_cast(r); } + template -inline long ltrunc(const T& v) +BOOST_MATH_GPU_ENABLED inline long ltrunc(const T& v) { return ltrunc(v, policies::policy<>()); } template -inline long long lltrunc(const T& v, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline long long lltrunc(const T& v, const Policy& pol) { BOOST_MATH_STD_USING using result_type = tools::promote_args_t; result_type r = boost::math::trunc(v, pol); - #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP + #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT) if constexpr (std::is_arithmetic_v #ifdef BOOST_MATH_FLOAT128_TYPE && !std::is_same_v @@ -194,7 +244,7 @@ inline long long lltrunc(const T& v, const Policy& pol) } } #else - static const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); + BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); if (r >= max_val || r < -max_val) { @@ -204,21 +254,81 @@ inline long long lltrunc(const T& v, const Policy& pol) return static_cast(r); } + template -inline long long lltrunc(const T& v) +BOOST_MATH_GPU_ENABLED inline long long lltrunc(const T& v) { return lltrunc(v, policies::policy<>()); } +#else // Reduced impl specifically for NVRTC platform + +namespace detail { + +template +BOOST_MATH_GPU_ENABLED TargetType integer_trunc_impl(T v) +{ + double r = boost::math::trunc(v); + + const double max_val = ldexp(1.0, boost::math::numeric_limits::digits); + + if (r >= max_val || r < -max_val) + { + r = 0; + } + + return static_cast(r); +} + +} // Namespace detail + +template +BOOST_MATH_GPU_ENABLED int itrunc(T v) +{ + return detail::integer_trunc_impl(v); +} + +template +BOOST_MATH_GPU_ENABLED int itrunc(T v, const Policy&) +{ + return detail::integer_trunc_impl(v); +} + +template +BOOST_MATH_GPU_ENABLED long ltrunc(T v) +{ + return detail::integer_trunc_impl(v); +} + +template +BOOST_MATH_GPU_ENABLED long ltrunc(T v, const Policy&) +{ + return detail::integer_trunc_impl(v); +} + +template +BOOST_MATH_GPU_ENABLED long long lltrunc(T v) +{ + return detail::integer_trunc_impl(v); +} + +template +BOOST_MATH_GPU_ENABLED long long lltrunc(T v, const Policy&) +{ + return detail::integer_trunc_impl(v); +} + +#endif // BOOST_MATH_HAS_NVRTC + template -inline typename std::enable_if::value, int>::type +BOOST_MATH_GPU_ENABLED inline boost::math::enable_if_t, int> iconvert(const T& v, const Policy&) { return static_cast(v); } template -inline typename std::enable_if::value, int>::type +BOOST_MATH_GPU_ENABLED inline boost::math::enable_if_t, int> iconvert(const T& v, const Policy& pol) { using boost::math::itrunc; @@ -226,14 +336,14 @@ inline typename std::enable_if::value, int>::type } template -inline typename std::enable_if::value, long>::type +BOOST_MATH_GPU_ENABLED inline boost::math::enable_if_t, long> lconvert(const T& v, const Policy&) { return static_cast(v); } template -inline typename std::enable_if::value, long>::type +BOOST_MATH_GPU_ENABLED inline boost::math::enable_if_t, long> lconvert(const T& v, const Policy& pol) { using boost::math::ltrunc; @@ -241,14 +351,29 @@ inline typename std::enable_if::value, long>::ty } template -inline typename std::enable_if::value, long long>::type +BOOST_MATH_GPU_ENABLED inline boost::math::enable_if_t, long long> + llconvert(const T& v, const Policy&) +{ + return static_cast(v); +} + +template +BOOST_MATH_GPU_ENABLED inline typename boost::math::enable_if_t, long long> + llconvert(const T& v, const Policy& pol) +{ + using boost::math::lltrunc; + return lltrunc(v, pol); +} + +template +BOOST_MATH_GPU_ENABLED [[deprecated("Use llconvert")]] inline boost::math::enable_if_t, long long> llconvertert(const T& v, const Policy&) { return static_cast(v); } template -inline typename std::enable_if::value, long long>::type +BOOST_MATH_GPU_ENABLED [[deprecated("Use llconvert")]] inline typename boost::math::enable_if_t, long long> llconvertert(const T& v, const Policy& pol) { using boost::math::lltrunc; diff --git a/include/boost/math/special_functions/ulp.hpp b/include/boost/math/special_functions/ulp.hpp index 3c0616db0e..5d1617aced 100644 --- a/include/boost/math/special_functions/ulp.hpp +++ b/include/boost/math/special_functions/ulp.hpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace boost{ namespace math{ namespace detail{ diff --git a/include/boost/math/tools/array.hpp b/include/boost/math/tools/array.hpp new file mode 100644 index 0000000000..23e666673c --- /dev/null +++ b/include/boost/math/tools/array.hpp @@ -0,0 +1,41 @@ +// Copyright (c) 2024 Matt Borland +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Regular use of std::array functions can not be used on +// GPU platforms like CUDA since they are missing the __device__ marker +// Alias as needed to get correct support + +#ifndef BOOST_MATH_TOOLS_ARRAY_HPP +#define BOOST_MATH_TOOLS_ARRAY_HPP + +#include + +#ifdef BOOST_MATH_ENABLE_CUDA + +#include + +namespace boost { +namespace math { + +using cuda::std::array; + +} // namespace math +} // namespace boost + +#else + +#include + +namespace boost { +namespace math { + +using std::array; + +} // namespace math +} // namespace boost + +#endif // BOOST_MATH_ENABLE_CUDA + +#endif // BOOST_MATH_TOOLS_ARRAY_HPP diff --git a/include/boost/math/tools/assert.hpp b/include/boost/math/tools/assert.hpp index 3d5655923a..3f57351fc1 100644 --- a/include/boost/math/tools/assert.hpp +++ b/include/boost/math/tools/assert.hpp @@ -10,6 +10,19 @@ #ifndef BOOST_MATH_TOOLS_ASSERT_HPP #define BOOST_MATH_TOOLS_ASSERT_HPP +#include + +#ifdef BOOST_MATH_HAS_GPU_SUPPORT + +// Run time asserts are generally unsupported + +#define BOOST_MATH_ASSERT(expr) +#define BOOST_MATH_ASSERT_MSG(expr, msg) +#define BOOST_MATH_STATIC_ASSERT(expr) static_assert(expr, #expr " failed") +#define BOOST_MATH_STATIC_ASSERT_MSG(expr, msg) static_assert(expr, msg) + +#else + #include #ifndef BOOST_MATH_STANDALONE @@ -29,6 +42,8 @@ #define BOOST_MATH_STATIC_ASSERT(expr) static_assert(expr, #expr " failed") #define BOOST_MATH_STATIC_ASSERT_MSG(expr, msg) static_assert(expr, msg) -#endif +#endif // Is standalone + +#endif // BOOST_MATH_HAS_GPU_SUPPORT #endif // BOOST_MATH_TOOLS_ASSERT_HPP diff --git a/include/boost/math/tools/big_constant.hpp b/include/boost/math/tools/big_constant.hpp index eaa34dd230..0d54976bc4 100644 --- a/include/boost/math/tools/big_constant.hpp +++ b/include/boost/math/tools/big_constant.hpp @@ -8,6 +8,12 @@ #define BOOST_MATH_TOOLS_BIG_CONSTANT_HPP #include + +// On NVRTC we don't need any of this +// We just have a simple definition of the macro since the largest float +// type on the platform is a 64-bit double +#ifndef BOOST_MATH_HAS_NVRTC + #ifndef BOOST_MATH_STANDALONE #include #endif @@ -43,12 +49,12 @@ typedef double largest_float; #endif template -inline constexpr T make_big_value(largest_float v, const char*, std::true_type const&, std::false_type const&) BOOST_MATH_NOEXCEPT(T) +BOOST_MATH_GPU_ENABLED constexpr T make_big_value(largest_float v, const char*, std::true_type const&, std::false_type const&) BOOST_MATH_NOEXCEPT(T) { return static_cast(v); } template -inline constexpr T make_big_value(largest_float v, const char*, std::true_type const&, std::true_type const&) BOOST_MATH_NOEXCEPT(T) +BOOST_MATH_GPU_ENABLED constexpr T make_big_value(largest_float v, const char*, std::true_type const&, std::true_type const&) BOOST_MATH_NOEXCEPT(T) { return static_cast(v); } @@ -94,5 +100,7 @@ inline constexpr T make_big_value(largest_float, const char* s, std::false_type }}} // namespaces +#endif // BOOST_MATH_HAS_NVRTC + #endif diff --git a/include/boost/math/tools/complex.hpp b/include/boost/math/tools/complex.hpp index d462ca8092..ec51440116 100644 --- a/include/boost/math/tools/complex.hpp +++ b/include/boost/math/tools/complex.hpp @@ -10,9 +10,39 @@ #ifndef BOOST_MATH_TOOLS_COMPLEX_HPP #define BOOST_MATH_TOOLS_COMPLEX_HPP -#include +#include #include +#ifdef BOOST_MATH_ENABLE_CUDA + +#include +#include + +namespace boost { +namespace math { + +template +using complex = cuda::std::complex; + +} // namespace math +} // namespace boost + +#else + +#include +#include + +namespace boost { +namespace math { + +template +using complex = std::complex; + +} // namespace math +} // namespace boost + +#endif + namespace boost { namespace math { namespace tools { @@ -24,12 +54,21 @@ namespace boost { static constexpr bool value = false; }; + #ifndef BOOST_MATH_ENABLE_CUDA template struct is_complex_type_impl().real()), decltype(std::declval().imag())>> { static constexpr bool value = true; }; + #else + template + struct is_complex_type_impl().real()), + decltype(cuda::std::declval().imag())>> + { + static constexpr bool value = true; + }; + #endif } // Namespace detail template diff --git a/include/boost/math/tools/config.hpp b/include/boost/math/tools/config.hpp index 6d962a08a6..12f3411c2f 100644 --- a/include/boost/math/tools/config.hpp +++ b/include/boost/math/tools/config.hpp @@ -11,6 +11,8 @@ #pragma once #endif +#ifndef __CUDACC_RTC__ + #include // Minimum language standard transition @@ -218,12 +220,16 @@ #include -#if (defined(__NetBSD__) || defined(__EMSCRIPTEN__)\ +#if (defined(__NetBSD__)\ || (defined(__hppa) && !defined(__OpenBSD__)) || (defined(__NO_LONG_DOUBLE_MATH) && (DBL_MANT_DIG != LDBL_MANT_DIG))) \ && !defined(BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS) //# define BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS #endif +#if defined(__EMSCRIPTEN__) && !defined(BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS) +# define BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS +#endif + #ifdef __IBMCPP__ // // For reasons I don't understand, the tests with IMB's compiler all @@ -463,7 +469,7 @@ struct non_type {}; #if defined(BOOST_MATH_STANDALONE) && defined(_GLIBCXX_USE_FLOAT128) && defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__) && !defined(__STRICT_ANSI__) \ && !defined(BOOST_MATH_DISABLE_FLOAT128) && !defined(BOOST_MATH_USE_FLOAT128) # define BOOST_MATH_USE_FLOAT128 -#elif defined(BOOST_HAS_FLOAT128) && !defined(BOOST_MATH_USE_FLOAT128) +#elif defined(BOOST_HAS_FLOAT128) && !defined(BOOST_MATH_USE_FLOAT128) && !defined(BOOST_MATH_DISABLE_FLOAT128) # define BOOST_MATH_USE_FLOAT128 #endif #ifdef BOOST_MATH_USE_FLOAT128 @@ -522,7 +528,9 @@ struct non_type {}; using std::ceil;\ using std::floor;\ using std::log10;\ - using std::sqrt; + using std::sqrt;\ + using std::log2;\ + using std::ilogb; #define BOOST_MATH_STD_USING BOOST_MATH_STD_USING_CORE @@ -660,6 +668,184 @@ namespace boost{ namespace math{ #define BOOST_MATH_CONSTEXPR_TABLE_FUNCTION #endif +// +// CUDA support: +// + +#ifdef __CUDACC__ + +// We have to get our include order correct otherwise you get compilation failures +#include +#include +#include +#include +#include +#include +#include +#include + +# define BOOST_MATH_CUDA_ENABLED __host__ __device__ +# define BOOST_MATH_HAS_GPU_SUPPORT + +# ifndef BOOST_MATH_ENABLE_CUDA +# define BOOST_MATH_ENABLE_CUDA +# endif + +// Device code can not handle exceptions +# ifndef BOOST_MATH_NO_EXCEPTIONS +# define BOOST_MATH_NO_EXCEPTIONS +# endif + +// We want to use force inline from CUDA instead of the host compiler +# undef BOOST_MATH_FORCEINLINE +# define BOOST_MATH_FORCEINLINE __forceinline__ + +#elif defined(SYCL_LANGUAGE_VERSION) + +# define BOOST_MATH_SYCL_ENABLED SYCL_EXTERNAL +# define BOOST_MATH_HAS_GPU_SUPPORT + +# ifndef BOOST_MATH_ENABLE_SYCL +# define BOOST_MATH_ENABLE_SYCL +# endif + +# ifndef BOOST_MATH_NO_EXCEPTIONS +# define BOOST_MATH_NO_EXCEPTIONS +# endif + +// spir64 does not support long double +# define BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS +# define BOOST_MATH_NO_REAL_CONCEPT_TESTS + +# undef BOOST_MATH_FORCEINLINE +# define BOOST_MATH_FORCEINLINE inline + +#endif + +#ifndef BOOST_MATH_CUDA_ENABLED +# define BOOST_MATH_CUDA_ENABLED +#endif + +#ifndef BOOST_MATH_SYCL_ENABLED +# define BOOST_MATH_SYCL_ENABLED +#endif + +// Not all functions that allow CUDA allow SYCL (e.g. Recursion is disallowed by SYCL) +# define BOOST_MATH_GPU_ENABLED BOOST_MATH_CUDA_ENABLED BOOST_MATH_SYCL_ENABLED + +// Additional functions that need replaced/marked up +#ifdef BOOST_MATH_HAS_GPU_SUPPORT +template +BOOST_MATH_GPU_ENABLED constexpr void gpu_safe_swap(T& a, T& b) { T t(a); a = b; b = t; } +template +BOOST_MATH_GPU_ENABLED constexpr T gpu_safe_min(const T& a, const T& b) { return a < b ? a : b; } +template +BOOST_MATH_GPU_ENABLED constexpr T gpu_safe_max(const T& a, const T& b) { return a > b ? a : b; } + +#define BOOST_MATH_GPU_SAFE_SWAP(a, b) gpu_safe_swap(a, b) +#define BOOST_MATH_GPU_SAFE_MIN(a, b) gpu_safe_min(a, b) +#define BOOST_MATH_GPU_SAFE_MAX(a, b) gpu_safe_max(a, b) + +#else + +#define BOOST_MATH_GPU_SAFE_SWAP(a, b) std::swap(a, b) +#define BOOST_MATH_GPU_SAFE_MIN(a, b) (std::min)(a, b) +#define BOOST_MATH_GPU_SAFE_MAX(a, b) (std::max)(a, b) + +#endif + +// Static variables are not allowed with CUDA or C++20 modules +// See if we can inline them instead + +#if defined(__cpp_inline_variables) && __cpp_inline_variables >= 201606L +# define BOOST_MATH_INLINE_CONSTEXPR inline constexpr +# define BOOST_MATH_STATIC static +# ifndef BOOST_MATH_HAS_GPU_SUPPORT +# define BOOST_MATH_STATIC_LOCAL_VARIABLE static +# else +# define BOOST_MATH_STATIC_LOCAL_VARIABLE +# endif +#else +# ifndef BOOST_MATH_HAS_GPU_SUPPORT +# define BOOST_MATH_INLINE_CONSTEXPR static constexpr +# define BOOST_MATH_STATIC static +# define BOOST_MATH_STATIC_LOCAL_VARIABLE +# else +# define BOOST_MATH_INLINE_CONSTEXPR constexpr +# define BOOST_MATH_STATIC constexpr +# define BOOST_MATH_STATIC_LOCAL_VARIABLE static +# endif +#endif + +#define BOOST_MATH_FP_NAN FP_NAN +#define BOOST_MATH_FP_INFINITE FP_INFINITE +#define BOOST_MATH_FP_ZERO FP_ZERO +#define BOOST_MATH_FP_SUBNORMAL FP_SUBNORMAL +#define BOOST_MATH_FP_NORMAL FP_NORMAL + +#else // Special section for CUDA NVRTC to ensure we consume no STL headers + +#ifndef BOOST_MATH_STANDALONE +# define BOOST_MATH_STANDALONE +#endif + +#define BOOST_MATH_HAS_NVRTC +#define BOOST_MATH_ENABLE_CUDA +#define BOOST_MATH_HAS_GPU_SUPPORT + +#define BOOST_MATH_GPU_ENABLED __host__ __device__ +#define BOOST_MATH_CUDA_ENABLED __host__ __device__ + +#define BOOST_MATH_STATIC static +#define BOOST_MATH_STATIC_LOCAL_VARIABLE + +#define BOOST_MATH_NOEXCEPT(T) noexcept(boost::math::is_floating_point_v) +#define BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T) +#define BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T) +#define BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(T) +#define BOOST_MATH_BIG_CONSTANT(T, N, V) static_cast(V) +#define BOOST_MATH_FORCEINLINE __forceinline__ +#define BOOST_MATH_STD_USING +#define BOOST_MATH_IF_CONSTEXPR if +#define BOOST_MATH_IS_FLOAT(T) (boost::math::is_floating_point::value) +#define BOOST_MATH_CONSTEXPR_TABLE_FUNCTION constexpr +#define BOOST_MATH_NO_EXCEPTIONS +#define BOOST_MATH_PREVENT_MACRO_SUBSTITUTION + +// This should be defined to nothing but since it is not specifically a math macro +// we need to undef before proceeding +#ifdef BOOST_FPU_EXCEPTION_GUARD +# undef BOOST_FPU_EXCEPTION_GUARD +#endif + +#define BOOST_FPU_EXCEPTION_GUARD + +template +BOOST_MATH_GPU_ENABLED constexpr void gpu_safe_swap(T& a, T& b) { T t(a); a = b; b = t; } + +#define BOOST_MATH_GPU_SAFE_SWAP(a, b) gpu_safe_swap(a, b) +#define BOOST_MATH_GPU_SAFE_MIN(a, b) (::min)(a, b) +#define BOOST_MATH_GPU_SAFE_MAX(a, b) (::max)(a, b) + +#define BOOST_MATH_FP_NAN 0 +#define BOOST_MATH_FP_INFINITE 1 +#define BOOST_MATH_FP_ZERO 2 +#define BOOST_MATH_FP_SUBNORMAL 3 +#define BOOST_MATH_FP_NORMAL 4 + +#define BOOST_MATH_INT_VALUE_SUFFIX(RV, SUF) RV##SUF +#define BOOST_MATH_INT_TABLE_TYPE(RT, IT) IT + +#if defined(__cpp_inline_variables) && __cpp_inline_variables >= 201606L +# define BOOST_MATH_INLINE_CONSTEXPR inline constexpr +#else +# define BOOST_MATH_INLINE_CONSTEXPR constexpr +#endif + +#define BOOST_MATH_INSTRUMENT_VARIABLE(x) +#define BOOST_MATH_INSTRUMENT_CODE(x) + +#endif // NVRTC #endif // BOOST_MATH_TOOLS_CONFIG_HPP diff --git a/include/boost/math/tools/cstdint.hpp b/include/boost/math/tools/cstdint.hpp new file mode 100644 index 0000000000..ce2c913b5c --- /dev/null +++ b/include/boost/math/tools/cstdint.hpp @@ -0,0 +1,107 @@ +// Copyright (c) 2024 Matt Borland +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_TOOLS_CSTDINT +#define BOOST_MATH_TOOLS_CSTDINT + +#include + + +#ifdef BOOST_MATH_ENABLE_CUDA + +#include + +namespace boost { +namespace math { + +using cuda::std::int8_t; +using cuda::std::int16_t; +using cuda::std::int32_t; +using cuda::std::int64_t; + +using cuda::std::int_fast8_t; +using cuda::std::int_fast16_t; +using cuda::std::int_fast32_t; +using cuda::std::int_fast64_t; + +using cuda::std::int_least8_t; +using cuda::std::int_least16_t; +using cuda::std::int_least32_t; +using cuda::std::int_least64_t; + +using cuda::std::intmax_t; +using cuda::std::intptr_t; + +using cuda::std::uint8_t; +using cuda::std::uint16_t; +using cuda::std::uint32_t; +using cuda::std::uint64_t; + +using cuda::std::uint_fast8_t; +using cuda::std::uint_fast16_t; +using cuda::std::uint_fast32_t; +using cuda::std::uint_fast64_t; + +using cuda::std::uint_least8_t; +using cuda::std::uint_least16_t; +using cuda::std::uint_least32_t; +using cuda::std::uint_least64_t; + +using cuda::std::uintmax_t; +using cuda::std::uintptr_t; + +using size_t = unsigned long; + +#else + +#include + +namespace boost { +namespace math { + +using std::int8_t; +using std::int16_t; +using std::int32_t; +using std::int64_t; + +using std::int_fast8_t; +using std::int_fast16_t; +using std::int_fast32_t; +using std::int_fast64_t; + +using std::int_least8_t; +using std::int_least16_t; +using std::int_least32_t; +using std::int_least64_t; + +using std::intmax_t; +using std::intptr_t; + +using std::uint8_t; +using std::uint16_t; +using std::uint32_t; +using std::uint64_t; + +using std::uint_fast8_t; +using std::uint_fast16_t; +using std::uint_fast32_t; +using std::uint_fast64_t; + +using std::uint_least8_t; +using std::uint_least16_t; +using std::uint_least32_t; +using std::uint_least64_t; + +using std::uintmax_t; +using std::uintptr_t; + +using std::size_t; + +#endif + +} // namespace math +} // namespace boost + +#endif // BOOST_MATH_TOOLS_CSTDINT diff --git a/include/boost/math/tools/detail/polynomial_horner1_10.hpp b/include/boost/math/tools/detail/polynomial_horner1_10.hpp index 6876af2d24..04ad90b69b 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_10.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_10.hpp @@ -12,67 +12,67 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_11.hpp b/include/boost/math/tools/detail/polynomial_horner1_11.hpp index a5154c7a68..f99ab82507 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_11.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_11.hpp @@ -12,73 +12,73 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_12.hpp b/include/boost/math/tools/detail/polynomial_horner1_12.hpp index 82bf88c28e..3006ebe51e 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_12.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_12.hpp @@ -12,79 +12,79 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_13.hpp b/include/boost/math/tools/detail/polynomial_horner1_13.hpp index f61c553dd9..0f11189097 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_13.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_13.hpp @@ -12,85 +12,85 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_14.hpp b/include/boost/math/tools/detail/polynomial_horner1_14.hpp index 76e9f07b25..caba4b97ea 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_14.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_14.hpp @@ -12,91 +12,91 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_15.hpp b/include/boost/math/tools/detail/polynomial_horner1_15.hpp index bca8cf7241..c8f42ac813 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_15.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_15.hpp @@ -12,97 +12,97 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_16.hpp b/include/boost/math/tools/detail/polynomial_horner1_16.hpp index 16ddb081dd..2ed591ccf5 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_16.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_16.hpp @@ -12,103 +12,103 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_17.hpp b/include/boost/math/tools/detail/polynomial_horner1_17.hpp index 5828621fb8..5e9fc8cd7c 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_17.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_17.hpp @@ -12,109 +12,109 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_18.hpp b/include/boost/math/tools/detail/polynomial_horner1_18.hpp index a2a1c12f4c..ffb62ff049 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_18.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_18.hpp @@ -12,115 +12,115 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((((((a[17] * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_19.hpp b/include/boost/math/tools/detail/polynomial_horner1_19.hpp index 83ede26b5a..56df108ac8 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_19.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_19.hpp @@ -12,121 +12,121 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((((((a[17] * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((((((a[18] * x + a[17]) * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_2.hpp b/include/boost/math/tools/detail/polynomial_horner1_2.hpp index 93d0f7c9c8..63091ebddd 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_2.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_2.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_20.hpp b/include/boost/math/tools/detail/polynomial_horner1_20.hpp index d770209113..c16e5143ec 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_20.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_20.hpp @@ -12,127 +12,127 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((((((a[17] * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((((((a[18] * x + a[17]) * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((((((((a[19] * x + a[18]) * x + a[17]) * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_3.hpp b/include/boost/math/tools/detail/polynomial_horner1_3.hpp index 0fde1a7430..0aeccc1115 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_3.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_3.hpp @@ -12,25 +12,25 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_4.hpp b/include/boost/math/tools/detail/polynomial_horner1_4.hpp index 9e589791c3..61058fce84 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_4.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_4.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_5.hpp b/include/boost/math/tools/detail/polynomial_horner1_5.hpp index 64dc00251d..47021bc509 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_5.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_5.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_6.hpp b/include/boost/math/tools/detail/polynomial_horner1_6.hpp index dbc06347f3..bfd24371d5 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_6.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_6.hpp @@ -12,43 +12,43 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_7.hpp b/include/boost/math/tools/detail/polynomial_horner1_7.hpp index 1472b2ede0..50ddca63ff 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_7.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_7.hpp @@ -12,49 +12,49 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_8.hpp b/include/boost/math/tools/detail/polynomial_horner1_8.hpp index 95edfa0c60..3be7ba4d16 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_8.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_8.hpp @@ -12,55 +12,55 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_9.hpp b/include/boost/math/tools/detail/polynomial_horner1_9.hpp index f434a26c4b..4ec53c48bd 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_9.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_9.hpp @@ -12,61 +12,61 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner2_10.hpp b/include/boost/math/tools/detail/polynomial_horner2_10.hpp index 1fce239a47..f242d7464e 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_10.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_10.hpp @@ -12,72 +12,72 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); diff --git a/include/boost/math/tools/detail/polynomial_horner2_11.hpp b/include/boost/math/tools/detail/polynomial_horner2_11.hpp index 3cf086c3b1..edf7f86c52 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_11.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_11.hpp @@ -12,79 +12,79 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); diff --git a/include/boost/math/tools/detail/polynomial_horner2_12.hpp b/include/boost/math/tools/detail/polynomial_horner2_12.hpp index e9f8eae7c6..969c9c4ddd 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_12.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_12.hpp @@ -12,86 +12,86 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); diff --git a/include/boost/math/tools/detail/polynomial_horner2_13.hpp b/include/boost/math/tools/detail/polynomial_horner2_13.hpp index d9d2a5e24a..ed4559d11e 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_13.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_13.hpp @@ -12,93 +12,93 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); diff --git a/include/boost/math/tools/detail/polynomial_horner2_14.hpp b/include/boost/math/tools/detail/polynomial_horner2_14.hpp index b4280597a8..4b79eb78a4 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_14.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_14.hpp @@ -12,100 +12,100 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); diff --git a/include/boost/math/tools/detail/polynomial_horner2_15.hpp b/include/boost/math/tools/detail/polynomial_horner2_15.hpp index 89a7a46f53..28b62eee75 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_15.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_15.hpp @@ -12,107 +12,107 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); diff --git a/include/boost/math/tools/detail/polynomial_horner2_16.hpp b/include/boost/math/tools/detail/polynomial_horner2_16.hpp index d2379d2bc1..6368b40548 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_16.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_16.hpp @@ -12,114 +12,114 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); diff --git a/include/boost/math/tools/detail/polynomial_horner2_17.hpp b/include/boost/math/tools/detail/polynomial_horner2_17.hpp index d1921efc49..551e6191cf 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_17.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_17.hpp @@ -12,121 +12,121 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((((a[16] * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); diff --git a/include/boost/math/tools/detail/polynomial_horner2_18.hpp b/include/boost/math/tools/detail/polynomial_horner2_18.hpp index 945c4e403b..19cfdc19e1 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_18.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_18.hpp @@ -12,128 +12,128 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((((a[16] * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((((a[17] * x2 + a[15]) * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((((a[16] * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); diff --git a/include/boost/math/tools/detail/polynomial_horner2_19.hpp b/include/boost/math/tools/detail/polynomial_horner2_19.hpp index a3049354ca..9ea87fd93b 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_19.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_19.hpp @@ -12,135 +12,135 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((((a[16] * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((((a[17] * x2 + a[15]) * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((((a[16] * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((((a[18] * x2 + a[16]) * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((((a[17] * x2 + a[15]) * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); diff --git a/include/boost/math/tools/detail/polynomial_horner2_2.hpp b/include/boost/math/tools/detail/polynomial_horner2_2.hpp index 8b3a7dcd83..1982a81f3f 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_2.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_2.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner2_20.hpp b/include/boost/math/tools/detail/polynomial_horner2_20.hpp index a4ccc93b3e..23afe55e05 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_20.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_20.hpp @@ -12,142 +12,142 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((((a[16] * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((((a[17] * x2 + a[15]) * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((((a[16] * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((((a[18] * x2 + a[16]) * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((((a[17] * x2 + a[15]) * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((((((a[19] * x2 + a[17]) * x2 + a[15]) * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((((((a[18] * x2 + a[16]) * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); diff --git a/include/boost/math/tools/detail/polynomial_horner2_3.hpp b/include/boost/math/tools/detail/polynomial_horner2_3.hpp index d0b988cf81..f9d6953b82 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_3.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_3.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner2_4.hpp b/include/boost/math/tools/detail/polynomial_horner2_4.hpp index 7f0708680c..8f11de5b31 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_4.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_4.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner2_5.hpp b/include/boost/math/tools/detail/polynomial_horner2_5.hpp index f4e7b809b0..eba9ee9e6d 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_5.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_5.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); diff --git a/include/boost/math/tools/detail/polynomial_horner2_6.hpp b/include/boost/math/tools/detail/polynomial_horner2_6.hpp index 764e522505..ef77c6255b 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_6.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_6.hpp @@ -12,44 +12,44 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); diff --git a/include/boost/math/tools/detail/polynomial_horner2_7.hpp b/include/boost/math/tools/detail/polynomial_horner2_7.hpp index 50fb3333cb..fe8d21b95f 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_7.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_7.hpp @@ -12,51 +12,51 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); diff --git a/include/boost/math/tools/detail/polynomial_horner2_8.hpp b/include/boost/math/tools/detail/polynomial_horner2_8.hpp index c74b19d435..de1810a940 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_8.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_8.hpp @@ -12,58 +12,58 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); diff --git a/include/boost/math/tools/detail/polynomial_horner2_9.hpp b/include/boost/math/tools/detail/polynomial_horner2_9.hpp index 7d6e7e421f..5c53b73299 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_9.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_9.hpp @@ -12,65 +12,65 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); diff --git a/include/boost/math/tools/detail/polynomial_horner3_10.hpp b/include/boost/math/tools/detail/polynomial_horner3_10.hpp index b980b1b3d2..7fb5bb4745 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_10.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_10.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_11.hpp b/include/boost/math/tools/detail/polynomial_horner3_11.hpp index 2ab4b2ac3a..9f22820dea 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_11.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_11.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_12.hpp b/include/boost/math/tools/detail/polynomial_horner3_12.hpp index 4606427277..b049613766 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_12.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_12.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_13.hpp b/include/boost/math/tools/detail/polynomial_horner3_13.hpp index d35fa904f2..f39a33cc90 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_13.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_13.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_14.hpp b/include/boost/math/tools/detail/polynomial_horner3_14.hpp index 346b9dc28e..32b9e7db29 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_14.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_14.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -231,7 +231,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_15.hpp b/include/boost/math/tools/detail/polynomial_horner3_15.hpp index 500bc32317..55325c84b9 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_15.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_15.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -231,7 +231,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -262,7 +262,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_16.hpp b/include/boost/math/tools/detail/polynomial_horner3_16.hpp index 269f367390..f71d62f50c 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_16.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_16.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -231,7 +231,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -262,7 +262,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -295,7 +295,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_17.hpp b/include/boost/math/tools/detail/polynomial_horner3_17.hpp index 1d97a6f154..783a34558c 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_17.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_17.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -231,7 +231,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -262,7 +262,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -295,7 +295,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -330,7 +330,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_18.hpp b/include/boost/math/tools/detail/polynomial_horner3_18.hpp index 80e49cbb12..b10b270c41 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_18.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_18.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -231,7 +231,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -262,7 +262,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -295,7 +295,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -330,7 +330,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -367,7 +367,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_19.hpp b/include/boost/math/tools/detail/polynomial_horner3_19.hpp index eae3775e06..21147591c8 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_19.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_19.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -231,7 +231,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -262,7 +262,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -295,7 +295,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -330,7 +330,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -367,7 +367,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -406,7 +406,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_2.hpp b/include/boost/math/tools/detail/polynomial_horner3_2.hpp index 6281674205..ee3e35e6ca 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_2.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_2.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner3_20.hpp b/include/boost/math/tools/detail/polynomial_horner3_20.hpp index 00f8caae2a..338aeb7dbc 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_20.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_20.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -231,7 +231,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -262,7 +262,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -295,7 +295,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -330,7 +330,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -367,7 +367,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -406,7 +406,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -447,7 +447,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_3.hpp b/include/boost/math/tools/detail/polynomial_horner3_3.hpp index 8f69c2bfef..1eee0cfac0 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_3.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_3.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner3_4.hpp b/include/boost/math/tools/detail/polynomial_horner3_4.hpp index 34db812343..efa7fba485 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_4.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_4.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner3_5.hpp b/include/boost/math/tools/detail/polynomial_horner3_5.hpp index ed955e4a70..f150e2a4a4 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_5.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_5.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_6.hpp b/include/boost/math/tools/detail/polynomial_horner3_6.hpp index 96d9a6ddad..fe679e74d2 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_6.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_6.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_7.hpp b/include/boost/math/tools/detail/polynomial_horner3_7.hpp index 80a9f3af4a..76f080ad9c 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_7.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_7.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_8.hpp b/include/boost/math/tools/detail/polynomial_horner3_8.hpp index ee526ad736..75634bdfc6 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_8.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_8.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_9.hpp b/include/boost/math/tools/detail/polynomial_horner3_9.hpp index a17ce909c7..63a40580d1 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_9.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_9.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/rational_horner1_10.hpp b/include/boost/math/tools/detail/rational_horner1_10.hpp index 6a04128ca6..e2f6c6d2fb 100644 --- a/include/boost/math/tools/detail/rational_horner1_10.hpp +++ b/include/boost/math/tools/detail/rational_horner1_10.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_11.hpp b/include/boost/math/tools/detail/rational_horner1_11.hpp index d43e53433f..31d480a65a 100644 --- a/include/boost/math/tools/detail/rational_horner1_11.hpp +++ b/include/boost/math/tools/detail/rational_horner1_11.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_12.hpp b/include/boost/math/tools/detail/rational_horner1_12.hpp index 33d19eb380..c08a85b3a6 100644 --- a/include/boost/math/tools/detail/rational_horner1_12.hpp +++ b/include/boost/math/tools/detail/rational_horner1_12.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_13.hpp b/include/boost/math/tools/detail/rational_horner1_13.hpp index 2069aa5150..cc87ec2dc7 100644 --- a/include/boost/math/tools/detail/rational_horner1_13.hpp +++ b/include/boost/math/tools/detail/rational_horner1_13.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_14.hpp b/include/boost/math/tools/detail/rational_horner1_14.hpp index 5ebcde6260..256473710f 100644 --- a/include/boost/math/tools/detail/rational_horner1_14.hpp +++ b/include/boost/math/tools/detail/rational_horner1_14.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -168,7 +168,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((b[13] * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_15.hpp b/include/boost/math/tools/detail/rational_horner1_15.hpp index 9da8e1b711..2ab24814e7 100644 --- a/include/boost/math/tools/detail/rational_horner1_15.hpp +++ b/include/boost/math/tools/detail/rational_horner1_15.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -168,7 +168,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((b[13] * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((b[14] * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_16.hpp b/include/boost/math/tools/detail/rational_horner1_16.hpp index 203ba78196..dce0b5e9b1 100644 --- a/include/boost/math/tools/detail/rational_horner1_16.hpp +++ b/include/boost/math/tools/detail/rational_horner1_16.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -168,7 +168,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((b[13] * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((b[14] * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -192,7 +192,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((b[15] * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_17.hpp b/include/boost/math/tools/detail/rational_horner1_17.hpp index e382d2931a..8e875d6576 100644 --- a/include/boost/math/tools/detail/rational_horner1_17.hpp +++ b/include/boost/math/tools/detail/rational_horner1_17.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -168,7 +168,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((b[13] * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((b[14] * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -192,7 +192,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((b[15] * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -204,7 +204,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((((b[16] * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_18.hpp b/include/boost/math/tools/detail/rational_horner1_18.hpp index 66f668ee35..ab67a970b0 100644 --- a/include/boost/math/tools/detail/rational_horner1_18.hpp +++ b/include/boost/math/tools/detail/rational_horner1_18.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -168,7 +168,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((b[13] * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((b[14] * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -192,7 +192,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((b[15] * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -204,7 +204,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((((b[16] * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -216,7 +216,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((((((((a[17] * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((((b[17] * x + b[16]) * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_19.hpp b/include/boost/math/tools/detail/rational_horner1_19.hpp index 9cd1391434..dc300343a5 100644 --- a/include/boost/math/tools/detail/rational_horner1_19.hpp +++ b/include/boost/math/tools/detail/rational_horner1_19.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -168,7 +168,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((b[13] * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((b[14] * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -192,7 +192,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((b[15] * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -204,7 +204,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((((b[16] * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -216,7 +216,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((((((((a[17] * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((((b[17] * x + b[16]) * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -228,7 +228,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((((((((((a[18] * x + a[17]) * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((((((b[18] * x + b[17]) * x + b[16]) * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_2.hpp b/include/boost/math/tools/detail/rational_horner1_2.hpp index f42cbfc645..c6b1ef9ef9 100644 --- a/include/boost/math/tools/detail/rational_horner1_2.hpp +++ b/include/boost/math/tools/detail/rational_horner1_2.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_20.hpp b/include/boost/math/tools/detail/rational_horner1_20.hpp index 0a6c2a0f26..5b8b170c15 100644 --- a/include/boost/math/tools/detail/rational_horner1_20.hpp +++ b/include/boost/math/tools/detail/rational_horner1_20.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -168,7 +168,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((b[13] * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((b[14] * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -192,7 +192,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((b[15] * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -204,7 +204,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((((b[16] * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -216,7 +216,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((((((((a[17] * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((((b[17] * x + b[16]) * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -228,7 +228,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((((((((((((a[18] * x + a[17]) * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((((((b[18] * x + b[17]) * x + b[16]) * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -240,7 +240,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((((((((((((((a[19] * x + a[18]) * x + a[17]) * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((((((b[19] * x + b[18]) * x + b[17]) * x + b[16]) * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_3.hpp b/include/boost/math/tools/detail/rational_horner1_3.hpp index d0ab213b3c..6933e22bf1 100644 --- a/include/boost/math/tools/detail/rational_horner1_3.hpp +++ b/include/boost/math/tools/detail/rational_horner1_3.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_4.hpp b/include/boost/math/tools/detail/rational_horner1_4.hpp index 44f40114a1..49b9835778 100644 --- a/include/boost/math/tools/detail/rational_horner1_4.hpp +++ b/include/boost/math/tools/detail/rational_horner1_4.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_5.hpp b/include/boost/math/tools/detail/rational_horner1_5.hpp index db032f15e1..91e97ff445 100644 --- a/include/boost/math/tools/detail/rational_horner1_5.hpp +++ b/include/boost/math/tools/detail/rational_horner1_5.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_6.hpp b/include/boost/math/tools/detail/rational_horner1_6.hpp index 4de5143ca9..876b026cde 100644 --- a/include/boost/math/tools/detail/rational_horner1_6.hpp +++ b/include/boost/math/tools/detail/rational_horner1_6.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_7.hpp b/include/boost/math/tools/detail/rational_horner1_7.hpp index 7d4ef69e9a..bcac18293c 100644 --- a/include/boost/math/tools/detail/rational_horner1_7.hpp +++ b/include/boost/math/tools/detail/rational_horner1_7.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_8.hpp b/include/boost/math/tools/detail/rational_horner1_8.hpp index bf4d7f57e3..55e30a53e8 100644 --- a/include/boost/math/tools/detail/rational_horner1_8.hpp +++ b/include/boost/math/tools/detail/rational_horner1_8.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_9.hpp b/include/boost/math/tools/detail/rational_horner1_9.hpp index cf3be7f824..c7087de508 100644 --- a/include/boost/math/tools/detail/rational_horner1_9.hpp +++ b/include/boost/math/tools/detail/rational_horner1_9.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner2_10.hpp b/include/boost/math/tools/detail/rational_horner2_10.hpp index 1a59aa334c..4d74a714d5 100644 --- a/include/boost/math/tools/detail/rational_horner2_10.hpp +++ b/include/boost/math/tools/detail/rational_horner2_10.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner2_11.hpp b/include/boost/math/tools/detail/rational_horner2_11.hpp index 1333a40bc8..15f1cf2556 100644 --- a/include/boost/math/tools/detail/rational_horner2_11.hpp +++ b/include/boost/math/tools/detail/rational_horner2_11.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner2_12.hpp b/include/boost/math/tools/detail/rational_horner2_12.hpp index a37cf5a05f..24e9d9e7f7 100644 --- a/include/boost/math/tools/detail/rational_horner2_12.hpp +++ b/include/boost/math/tools/detail/rational_horner2_12.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner2_13.hpp b/include/boost/math/tools/detail/rational_horner2_13.hpp index 648f3079c5..495f88525d 100644 --- a/include/boost/math/tools/detail/rational_horner2_13.hpp +++ b/include/boost/math/tools/detail/rational_horner2_13.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner2_14.hpp b/include/boost/math/tools/detail/rational_horner2_14.hpp index 7771c3da91..273e723b6c 100644 --- a/include/boost/math/tools/detail/rational_horner2_14.hpp +++ b/include/boost/math/tools/detail/rational_horner2_14.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -186,7 +186,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner2_15.hpp b/include/boost/math/tools/detail/rational_horner2_15.hpp index 03fae0d947..c7e24ec7db 100644 --- a/include/boost/math/tools/detail/rational_horner2_15.hpp +++ b/include/boost/math/tools/detail/rational_horner2_15.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -186,7 +186,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -202,7 +202,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner2_16.hpp b/include/boost/math/tools/detail/rational_horner2_16.hpp index d8565e104b..2eebd702bc 100644 --- a/include/boost/math/tools/detail/rational_horner2_16.hpp +++ b/include/boost/math/tools/detail/rational_horner2_16.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -186,7 +186,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -202,7 +202,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -218,7 +218,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner2_17.hpp b/include/boost/math/tools/detail/rational_horner2_17.hpp index bd8990e0c2..1fee63047f 100644 --- a/include/boost/math/tools/detail/rational_horner2_17.hpp +++ b/include/boost/math/tools/detail/rational_horner2_17.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -186,7 +186,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -202,7 +202,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -218,7 +218,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -234,7 +234,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner2_18.hpp b/include/boost/math/tools/detail/rational_horner2_18.hpp index 38b99ecf17..7aedbf2aad 100644 --- a/include/boost/math/tools/detail/rational_horner2_18.hpp +++ b/include/boost/math/tools/detail/rational_horner2_18.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -186,7 +186,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -202,7 +202,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -218,7 +218,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -234,7 +234,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -250,7 +250,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner2_19.hpp b/include/boost/math/tools/detail/rational_horner2_19.hpp index b77d2eb0b9..1c36a267cb 100644 --- a/include/boost/math/tools/detail/rational_horner2_19.hpp +++ b/include/boost/math/tools/detail/rational_horner2_19.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -186,7 +186,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -202,7 +202,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -218,7 +218,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -234,7 +234,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -250,7 +250,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -266,7 +266,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner2_2.hpp b/include/boost/math/tools/detail/rational_horner2_2.hpp index 9c4fe47a74..bb2e2c4dcf 100644 --- a/include/boost/math/tools/detail/rational_horner2_2.hpp +++ b/include/boost/math/tools/detail/rational_horner2_2.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } diff --git a/include/boost/math/tools/detail/rational_horner2_20.hpp b/include/boost/math/tools/detail/rational_horner2_20.hpp index 485639dcef..a591b901c9 100644 --- a/include/boost/math/tools/detail/rational_horner2_20.hpp +++ b/include/boost/math/tools/detail/rational_horner2_20.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -186,7 +186,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -202,7 +202,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -218,7 +218,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -234,7 +234,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -250,7 +250,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -266,7 +266,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -282,7 +282,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner2_3.hpp b/include/boost/math/tools/detail/rational_horner2_3.hpp index d19993cce1..0b410d8bbe 100644 --- a/include/boost/math/tools/detail/rational_horner2_3.hpp +++ b/include/boost/math/tools/detail/rational_horner2_3.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } diff --git a/include/boost/math/tools/detail/rational_horner2_4.hpp b/include/boost/math/tools/detail/rational_horner2_4.hpp index 847f26dc4e..07a9a2c5ad 100644 --- a/include/boost/math/tools/detail/rational_horner2_4.hpp +++ b/include/boost/math/tools/detail/rational_horner2_4.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } diff --git a/include/boost/math/tools/detail/rational_horner2_5.hpp b/include/boost/math/tools/detail/rational_horner2_5.hpp index 8633d5dc13..0933ddfbc4 100644 --- a/include/boost/math/tools/detail/rational_horner2_5.hpp +++ b/include/boost/math/tools/detail/rational_horner2_5.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner2_6.hpp b/include/boost/math/tools/detail/rational_horner2_6.hpp index 4555426334..dee9c6e168 100644 --- a/include/boost/math/tools/detail/rational_horner2_6.hpp +++ b/include/boost/math/tools/detail/rational_horner2_6.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner2_7.hpp b/include/boost/math/tools/detail/rational_horner2_7.hpp index 6a5c704d1c..6f9a85838c 100644 --- a/include/boost/math/tools/detail/rational_horner2_7.hpp +++ b/include/boost/math/tools/detail/rational_horner2_7.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner2_8.hpp b/include/boost/math/tools/detail/rational_horner2_8.hpp index 9ec861fc5f..33dda23bba 100644 --- a/include/boost/math/tools/detail/rational_horner2_8.hpp +++ b/include/boost/math/tools/detail/rational_horner2_8.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner2_9.hpp b/include/boost/math/tools/detail/rational_horner2_9.hpp index c76755cb22..a9025a8900 100644 --- a/include/boost/math/tools/detail/rational_horner2_9.hpp +++ b/include/boost/math/tools/detail/rational_horner2_9.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner3_10.hpp b/include/boost/math/tools/detail/rational_horner3_10.hpp index 773532cd55..b7cec124e2 100644 --- a/include/boost/math/tools/detail/rational_horner3_10.hpp +++ b/include/boost/math/tools/detail/rational_horner3_10.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner3_11.hpp b/include/boost/math/tools/detail/rational_horner3_11.hpp index a712fff090..579f0e4868 100644 --- a/include/boost/math/tools/detail/rational_horner3_11.hpp +++ b/include/boost/math/tools/detail/rational_horner3_11.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner3_12.hpp b/include/boost/math/tools/detail/rational_horner3_12.hpp index 5b87374abf..54300dd08e 100644 --- a/include/boost/math/tools/detail/rational_horner3_12.hpp +++ b/include/boost/math/tools/detail/rational_horner3_12.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner3_13.hpp b/include/boost/math/tools/detail/rational_horner3_13.hpp index 11591668b8..d2fc7b6331 100644 --- a/include/boost/math/tools/detail/rational_horner3_13.hpp +++ b/include/boost/math/tools/detail/rational_horner3_13.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner3_14.hpp b/include/boost/math/tools/detail/rational_horner3_14.hpp index 04f31249d4..0b7675f494 100644 --- a/include/boost/math/tools/detail/rational_horner3_14.hpp +++ b/include/boost/math/tools/detail/rational_horner3_14.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -672,7 +672,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner3_15.hpp b/include/boost/math/tools/detail/rational_horner3_15.hpp index 4b9cffd48a..8286caed0b 100644 --- a/include/boost/math/tools/detail/rational_horner3_15.hpp +++ b/include/boost/math/tools/detail/rational_horner3_15.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -672,7 +672,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -782,7 +782,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner3_16.hpp b/include/boost/math/tools/detail/rational_horner3_16.hpp index 3a384dcc5f..fc823e4162 100644 --- a/include/boost/math/tools/detail/rational_horner3_16.hpp +++ b/include/boost/math/tools/detail/rational_horner3_16.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -672,7 +672,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -782,7 +782,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -900,7 +900,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner3_17.hpp b/include/boost/math/tools/detail/rational_horner3_17.hpp index 1c9435e74a..cf7f75a706 100644 --- a/include/boost/math/tools/detail/rational_horner3_17.hpp +++ b/include/boost/math/tools/detail/rational_horner3_17.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -672,7 +672,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -782,7 +782,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -900,7 +900,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -1026,7 +1026,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner3_18.hpp b/include/boost/math/tools/detail/rational_horner3_18.hpp index b133e2bafc..f853ed3e0c 100644 --- a/include/boost/math/tools/detail/rational_horner3_18.hpp +++ b/include/boost/math/tools/detail/rational_horner3_18.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -672,7 +672,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -782,7 +782,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -900,7 +900,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -1026,7 +1026,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -1160,7 +1160,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner3_19.hpp b/include/boost/math/tools/detail/rational_horner3_19.hpp index ca35d3b68f..d44e22c90b 100644 --- a/include/boost/math/tools/detail/rational_horner3_19.hpp +++ b/include/boost/math/tools/detail/rational_horner3_19.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -672,7 +672,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -782,7 +782,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -900,7 +900,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -1026,7 +1026,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -1160,7 +1160,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -1302,7 +1302,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner3_2.hpp b/include/boost/math/tools/detail/rational_horner3_2.hpp index 9c4fe47a74..bb2e2c4dcf 100644 --- a/include/boost/math/tools/detail/rational_horner3_2.hpp +++ b/include/boost/math/tools/detail/rational_horner3_2.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } diff --git a/include/boost/math/tools/detail/rational_horner3_20.hpp b/include/boost/math/tools/detail/rational_horner3_20.hpp index 58109ac305..967edf0832 100644 --- a/include/boost/math/tools/detail/rational_horner3_20.hpp +++ b/include/boost/math/tools/detail/rational_horner3_20.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -672,7 +672,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -782,7 +782,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -900,7 +900,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -1026,7 +1026,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -1160,7 +1160,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -1302,7 +1302,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -1452,7 +1452,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner3_3.hpp b/include/boost/math/tools/detail/rational_horner3_3.hpp index d19993cce1..0b410d8bbe 100644 --- a/include/boost/math/tools/detail/rational_horner3_3.hpp +++ b/include/boost/math/tools/detail/rational_horner3_3.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } diff --git a/include/boost/math/tools/detail/rational_horner3_4.hpp b/include/boost/math/tools/detail/rational_horner3_4.hpp index 847f26dc4e..07a9a2c5ad 100644 --- a/include/boost/math/tools/detail/rational_horner3_4.hpp +++ b/include/boost/math/tools/detail/rational_horner3_4.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } diff --git a/include/boost/math/tools/detail/rational_horner3_5.hpp b/include/boost/math/tools/detail/rational_horner3_5.hpp index cc77fd560c..62c76dd506 100644 --- a/include/boost/math/tools/detail/rational_horner3_5.hpp +++ b/include/boost/math/tools/detail/rational_horner3_5.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner3_6.hpp b/include/boost/math/tools/detail/rational_horner3_6.hpp index 73920ad018..f81a068acb 100644 --- a/include/boost/math/tools/detail/rational_horner3_6.hpp +++ b/include/boost/math/tools/detail/rational_horner3_6.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner3_7.hpp b/include/boost/math/tools/detail/rational_horner3_7.hpp index 8e30ecf310..fea457ccf8 100644 --- a/include/boost/math/tools/detail/rational_horner3_7.hpp +++ b/include/boost/math/tools/detail/rational_horner3_7.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner3_8.hpp b/include/boost/math/tools/detail/rational_horner3_8.hpp index a8f93f3a3e..306e2a41d9 100644 --- a/include/boost/math/tools/detail/rational_horner3_8.hpp +++ b/include/boost/math/tools/detail/rational_horner3_8.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/detail/rational_horner3_9.hpp b/include/boost/math/tools/detail/rational_horner3_9.hpp index 064d984d3f..93a3527c18 100644 --- a/include/boost/math/tools/detail/rational_horner3_9.hpp +++ b/include/boost/math/tools/detail/rational_horner3_9.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*) BOOST_MATH_NOEXCEPT(V) { if((-1 <= x) && (x <= 1)) { diff --git a/include/boost/math/tools/fraction.hpp b/include/boost/math/tools/fraction.hpp index a64c070258..f36d024c40 100644 --- a/include/boost/math/tools/fraction.hpp +++ b/include/boost/math/tools/fraction.hpp @@ -1,4 +1,5 @@ // (C) Copyright John Maddock 2005-2006. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -10,11 +11,13 @@ #pragma once #endif +#include +#include +#include +#include #include #include -#include -#include -#include +#include namespace boost{ namespace math{ namespace tools{ @@ -22,10 +25,10 @@ namespace detail { template - struct is_pair : public std::false_type{}; + struct is_pair : public boost::math::false_type{}; template - struct is_pair> : public std::true_type{}; + struct is_pair> : public boost::math::true_type{}; template struct fraction_traits_simple @@ -33,11 +36,11 @@ namespace detail using result_type = typename Gen::result_type; using value_type = typename Gen::result_type; - static result_type a(const value_type&) BOOST_MATH_NOEXCEPT(value_type) + BOOST_MATH_GPU_ENABLED static result_type a(const value_type&) BOOST_MATH_NOEXCEPT(value_type) { return 1; } - static result_type b(const value_type& v) BOOST_MATH_NOEXCEPT(value_type) + BOOST_MATH_GPU_ENABLED static result_type b(const value_type& v) BOOST_MATH_NOEXCEPT(value_type) { return v; } @@ -49,11 +52,11 @@ namespace detail using value_type = typename Gen::result_type; using result_type = typename value_type::first_type; - static result_type a(const value_type& v) BOOST_MATH_NOEXCEPT(value_type) + BOOST_MATH_GPU_ENABLED static result_type a(const value_type& v) BOOST_MATH_NOEXCEPT(value_type) { return v.first; } - static result_type b(const value_type& v) BOOST_MATH_NOEXCEPT(value_type) + BOOST_MATH_GPU_ENABLED static result_type b(const value_type& v) BOOST_MATH_NOEXCEPT(value_type) { return v.second; } @@ -61,7 +64,7 @@ namespace detail template struct fraction_traits - : public std::conditional< + : public boost::math::conditional< is_pair::value, fraction_traits_pair, fraction_traits_simple>::type @@ -74,7 +77,7 @@ namespace detail // For float, double, and long double, 1/min_value() is finite. // But for mpfr_float and cpp_bin_float, 1/min_value() is inf. // Multiply the min by 16 so that the reciprocal doesn't overflow. - static T get() { + BOOST_MATH_GPU_ENABLED static T get() { return 16*tools::min_value(); } }; @@ -82,13 +85,15 @@ namespace detail struct tiny_value { using value_type = typename T::value_type; - static T get() { + BOOST_MATH_GPU_ENABLED static T get() { return 16*tools::min_value(); } }; } // namespace detail +namespace detail { + // // continued_fraction_b // Evaluates: @@ -103,9 +108,15 @@ namespace detail // // Note that the first a0 returned by generator Gen is discarded. // + template -inline typename detail::fraction_traits::result_type continued_fraction_b(Gen& g, const U& factor, std::uintmax_t& max_terms) - noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) && noexcept(std::declval()())) +BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_b_impl(Gen& g, const U& factor, boost::math::uintmax_t& max_terms) + noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) + #ifndef BOOST_MATH_HAS_GPU_SUPPORT + // SYCL can not handle this condition so we only check float on that platform + && noexcept(std::declval()()) + #endif + ) { BOOST_MATH_STD_USING // ADL of std names @@ -129,7 +140,7 @@ inline typename detail::fraction_traits::result_type continued_fraction_b(G C = f; D = 0; - std::uintmax_t counter(max_terms); + boost::math::uintmax_t counter(max_terms); do{ v = g(); D = traits::b(v) + traits::a(v) * D; @@ -148,17 +159,38 @@ inline typename detail::fraction_traits::result_type continued_fraction_b(G return f; } +} // namespace detail + +template +BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_b(Gen& g, const U& factor, boost::math::uintmax_t& max_terms) + noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) + #ifndef BOOST_MATH_HAS_GPU_SUPPORT + && noexcept(std::declval()()) + #endif + ) +{ + return detail::continued_fraction_b_impl(g, factor, max_terms); +} + template -inline typename detail::fraction_traits::result_type continued_fraction_b(Gen& g, const U& factor) - noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) && noexcept(std::declval()())) +BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_b(Gen& g, const U& factor) + noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) + #ifndef BOOST_MATH_HAS_GPU_SUPPORT + && noexcept(std::declval()()) + #endif + ) { - std::uintmax_t max_terms = (std::numeric_limits::max)(); - return continued_fraction_b(g, factor, max_terms); + boost::math::uintmax_t max_terms = (boost::math::numeric_limits::max)(); + return detail::continued_fraction_b_impl(g, factor, max_terms); } template -inline typename detail::fraction_traits::result_type continued_fraction_b(Gen& g, int bits) - noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) && noexcept(std::declval()())) +BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_b(Gen& g, int bits) + noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) + #ifndef BOOST_MATH_HAS_GPU_SUPPORT + && noexcept(std::declval()()) + #endif + ) { BOOST_MATH_STD_USING // ADL of std names @@ -166,13 +198,17 @@ inline typename detail::fraction_traits::result_type continued_fraction_b(G using result_type = typename traits::result_type; result_type factor = ldexp(1.0f, 1 - bits); // 1 / pow(result_type(2), bits); - std::uintmax_t max_terms = (std::numeric_limits::max)(); - return continued_fraction_b(g, factor, max_terms); + boost::math::uintmax_t max_terms = (boost::math::numeric_limits::max)(); + return detail::continued_fraction_b_impl(g, factor, max_terms); } template -inline typename detail::fraction_traits::result_type continued_fraction_b(Gen& g, int bits, std::uintmax_t& max_terms) - noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) && noexcept(std::declval()())) +BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_b(Gen& g, int bits, boost::math::uintmax_t& max_terms) + noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) + #ifndef BOOST_MATH_HAS_GPU_SUPPORT + && noexcept(std::declval()()) + #endif + ) { BOOST_MATH_STD_USING // ADL of std names @@ -180,9 +216,11 @@ inline typename detail::fraction_traits::result_type continued_fraction_b(G using result_type = typename traits::result_type; result_type factor = ldexp(1.0f, 1 - bits); // 1 / pow(result_type(2), bits); - return continued_fraction_b(g, factor, max_terms); + return detail::continued_fraction_b_impl(g, factor, max_terms); } +namespace detail { + // // continued_fraction_a // Evaluates: @@ -198,8 +236,12 @@ inline typename detail::fraction_traits::result_type continued_fraction_b(G // Note that the first a1 and b1 returned by generator Gen are both used. // template -inline typename detail::fraction_traits::result_type continued_fraction_a(Gen& g, const U& factor, std::uintmax_t& max_terms) - noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) && noexcept(std::declval()())) +BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_a_impl(Gen& g, const U& factor, boost::math::uintmax_t& max_terms) + noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) + #ifndef BOOST_MATH_HAS_GPU_SUPPORT + && noexcept(std::declval()()) + #endif + ) { BOOST_MATH_STD_USING // ADL of std names @@ -224,7 +266,7 @@ inline typename detail::fraction_traits::result_type continued_fraction_a(G C = f; D = 0; - std::uintmax_t counter(max_terms); + boost::math::uintmax_t counter(max_terms); do{ v = g(); @@ -244,17 +286,38 @@ inline typename detail::fraction_traits::result_type continued_fraction_a(G return a0/f; } +} // namespace detail + +template +BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_a(Gen& g, const U& factor, boost::math::uintmax_t& max_terms) + noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) + #ifndef BOOST_MATH_HAS_GPU_SUPPORT + && noexcept(std::declval()()) + #endif + ) +{ + return detail::continued_fraction_a_impl(g, factor, max_terms); +} + template -inline typename detail::fraction_traits::result_type continued_fraction_a(Gen& g, const U& factor) - noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) && noexcept(std::declval()())) +BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_a(Gen& g, const U& factor) + noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) + #ifndef BOOST_MATH_HAS_GPU_SUPPORT + && noexcept(std::declval()()) + #endif + ) { - std::uintmax_t max_iter = (std::numeric_limits::max)(); - return continued_fraction_a(g, factor, max_iter); + boost::math::uintmax_t max_iter = (boost::math::numeric_limits::max)(); + return detail::continued_fraction_a_impl(g, factor, max_iter); } template -inline typename detail::fraction_traits::result_type continued_fraction_a(Gen& g, int bits) - noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) && noexcept(std::declval()())) +BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_a(Gen& g, int bits) + noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) + #ifndef BOOST_MATH_HAS_GPU_SUPPORT + && noexcept(std::declval()()) + #endif + ) { BOOST_MATH_STD_USING // ADL of std names @@ -262,14 +325,18 @@ inline typename detail::fraction_traits::result_type continued_fraction_a(G typedef typename traits::result_type result_type; result_type factor = ldexp(1.0f, 1-bits); // 1 / pow(result_type(2), bits); - std::uintmax_t max_iter = (std::numeric_limits::max)(); + boost::math::uintmax_t max_iter = (boost::math::numeric_limits::max)(); - return continued_fraction_a(g, factor, max_iter); + return detail::continued_fraction_a_impl(g, factor, max_iter); } template -inline typename detail::fraction_traits::result_type continued_fraction_a(Gen& g, int bits, std::uintmax_t& max_terms) - noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) && noexcept(std::declval()())) +BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_a(Gen& g, int bits, boost::math::uintmax_t& max_terms) + noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) + #ifndef BOOST_MATH_HAS_GPU_SUPPORT + && noexcept(std::declval()()) + #endif + ) { BOOST_MATH_STD_USING // ADL of std names @@ -277,7 +344,7 @@ inline typename detail::fraction_traits::result_type continued_fraction_a(G using result_type = typename traits::result_type; result_type factor = ldexp(1.0f, 1-bits); // 1 / pow(result_type(2), bits); - return continued_fraction_a(g, factor, max_terms); + return detail::continued_fraction_a_impl(g, factor, max_terms); } } // namespace tools diff --git a/include/boost/math/tools/is_detected.hpp b/include/boost/math/tools/is_detected.hpp index 8dfe86b740..93fa96f60b 100644 --- a/include/boost/math/tools/is_detected.hpp +++ b/include/boost/math/tools/is_detected.hpp @@ -8,7 +8,7 @@ #ifndef BOOST_MATH_TOOLS_IS_DETECTED_HPP #define BOOST_MATH_TOOLS_IS_DETECTED_HPP -#include +#include namespace boost { namespace math { namespace tools { @@ -20,14 +20,14 @@ namespace detail { template class Op, typename... Args> struct detector { - using value_t = std::false_type; + using value_t = boost::math::false_type; using type = Default; }; template class Op, typename... Args> struct detector>, Op, Args...> { - using value_t = std::true_type; + using value_t = boost::math::true_type; using type = Op; }; diff --git a/include/boost/math/tools/minima.hpp b/include/boost/math/tools/minima.hpp index 6070fc5307..a6be94cb2b 100644 --- a/include/boost/math/tools/minima.hpp +++ b/include/boost/math/tools/minima.hpp @@ -11,20 +11,26 @@ #pragma once #endif -#include -#include -#include +#include +#include +#include +#include #include +#include #include namespace boost{ namespace math{ namespace tools{ template -std::pair brent_find_minima(F f, T min, T max, int bits, std::uintmax_t& max_iter) - noexcept(BOOST_MATH_IS_FLOAT(T) && noexcept(std::declval()(std::declval()))) +BOOST_MATH_GPU_ENABLED boost::math::pair brent_find_minima(F f, T min, T max, int bits, boost::math::uintmax_t& max_iter) + noexcept(BOOST_MATH_IS_FLOAT(T) + #ifndef BOOST_MATH_HAS_GPU_SUPPORT + && noexcept(std::declval()(std::declval())) + #endif + ) { BOOST_MATH_STD_USING - bits = (std::min)(policies::digits >() / 2, bits); + bits = (boost::math::min)(policies::digits >() / 2, bits); T tolerance = static_cast(ldexp(1.0, 1-bits)); T x; // minima so far T w; // second best point @@ -42,7 +48,7 @@ std::pair brent_find_minima(F f, T min, T max, int bits, std::uintmax_t& m fw = fv = fx = f(x); delta2 = delta = 0; - uintmax_t count = max_iter; + boost::math::uintmax_t count = max_iter; do{ // get midpoint @@ -134,14 +140,18 @@ std::pair brent_find_minima(F f, T min, T max, int bits, std::uintmax_t& m max_iter -= count; - return std::make_pair(x, fx); + return boost::math::make_pair(x, fx); } template -inline std::pair brent_find_minima(F f, T min, T max, int digits) - noexcept(BOOST_MATH_IS_FLOAT(T) && noexcept(std::declval()(std::declval()))) +BOOST_MATH_GPU_ENABLED inline boost::math::pair brent_find_minima(F f, T min, T max, int digits) + noexcept(BOOST_MATH_IS_FLOAT(T) + #ifndef BOOST_MATH_HAS_GPU_SUPPORT + && noexcept(std::declval()(std::declval())) + #endif + ) { - std::uintmax_t m = (std::numeric_limits::max)(); + boost::math::uintmax_t m = (boost::math::numeric_limits::max)(); return brent_find_minima(f, min, max, digits, m); } diff --git a/include/boost/math/tools/mp.hpp b/include/boost/math/tools/mp.hpp index 55aac1b092..560ae8b500 100644 --- a/include/boost/math/tools/mp.hpp +++ b/include/boost/math/tools/mp.hpp @@ -11,9 +11,9 @@ #ifndef BOOST_MATH_TOOLS_MP #define BOOST_MATH_TOOLS_MP -#include -#include -#include +#include +#include +#include namespace boost { namespace math { namespace tools { namespace meta_programming { @@ -23,12 +23,12 @@ template struct mp_list {}; // Size_t -template -using mp_size_t = std::integral_constant; +template +using mp_size_t = boost::math::integral_constant; // Boolean template -using mp_bool = std::integral_constant; +using mp_bool = boost::math::integral_constant; // Identity template @@ -53,7 +53,7 @@ struct mp_size_impl {}; template class L, typename... T> // Template template parameter must use class struct mp_size_impl> { - using type = std::integral_constant; + using type = boost::math::integral_constant; }; } @@ -79,7 +79,7 @@ namespace detail { // At // TODO - Use tree based lookup for larger typelists // http://odinthenerd.blogspot.com/2017/04/tree-based-lookup-why-kvasirmpl-is.html -template +template struct mp_at_c {}; template class L, typename T0, typename... T> @@ -168,7 +168,7 @@ struct mp_at_c, 1 }; } -template +template using mp_at_c = typename detail::mp_at_c::type; template @@ -336,25 +336,11 @@ using mp_remove_if = typename detail::mp_remove_if_impl::type; template using mp_remove_if_q = mp_remove_if; -// Index sequence -// Use C++14 index sequence if available -#if defined(__cpp_lib_integer_sequence) && (__cpp_lib_integer_sequence >= 201304) -template -using index_sequence = std::index_sequence; - -template -using make_index_sequence = std::make_index_sequence; - -template -using index_sequence_for = std::index_sequence_for; - -#else - template struct integer_sequence {}; -template -using index_sequence = integer_sequence; +template +using index_sequence = integer_sequence; namespace detail { @@ -426,13 +412,11 @@ struct make_integer_sequence_impl template using make_integer_sequence = typename detail::make_integer_sequence_impl::type; -template -using make_index_sequence = make_integer_sequence; +template +using make_index_sequence = make_integer_sequence; template -using index_sequence_for = make_integer_sequence; - -#endif +using index_sequence_for = make_integer_sequence; }}}} // namespaces diff --git a/include/boost/math/tools/numeric_limits.hpp b/include/boost/math/tools/numeric_limits.hpp new file mode 100644 index 0000000000..87a7802363 --- /dev/null +++ b/include/boost/math/tools/numeric_limits.hpp @@ -0,0 +1,888 @@ +// Copyright (c) 2024 Matt Borland +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Regular use of std::numeric_limits functions can not be used on +// GPU platforms like CUDA since they are missing the __device__ marker +// and libcu++ does not provide something analogous. +// Rather than using giant if else blocks make our own version of numeric limits +// +// On the CUDA NVRTC platform we use a best attempt at emulating the functions +// and values since we do not have any macros to go off of. +// Use the values as found on GCC 11.4 RHEL 9.4 x64 + +#ifndef BOOST_MATH_TOOLS_NUMERIC_LIMITS_HPP +#define BOOST_MATH_TOOLS_NUMERIC_LIMITS_HPP + +#include + +#ifndef BOOST_MATH_HAS_NVRTC + +#include +#include +#include +#include + +#endif + +namespace boost { +namespace math { + +template +struct numeric_limits +#ifndef BOOST_MATH_HAS_NVRTC +: public std::numeric_limits {}; +#else +{}; +#endif + +#if defined(BOOST_MATH_HAS_GPU_SUPPORT) && !defined(BOOST_MATH_HAS_NVRTC) + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float (min) () { return FLT_MIN; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float (max) () { return FLT_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float lowest () { return -FLT_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float epsilon () { return FLT_EPSILON; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float round_error () { return 0.5F; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float infinity () { return static_cast(INFINITY); } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float quiet_NaN () { return static_cast(NAN); } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float signaling_NaN () + { + #ifdef FLT_SNAN + return FLT_SNAN; + #else + return static_cast(NAN); + #endif + } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float denorm_min () { return FLT_TRUE_MIN; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double (min) () { return DBL_MIN; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double (max) () { return DBL_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double lowest () { return -DBL_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double epsilon () { return DBL_EPSILON; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double round_error () { return 0.5; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double infinity () { return static_cast(INFINITY); } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double quiet_NaN () { return static_cast(NAN); } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double signaling_NaN () + { + #ifdef DBL_SNAN + return DBL_SNAN; + #else + return static_cast(NAN); + #endif + } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double denorm_min () { return DBL_TRUE_MIN; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short (min) () { return SHRT_MIN; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short (max) () { return SHRT_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short lowest () { return SHRT_MIN; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short (min) () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short (max) () { return USHRT_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short lowest () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int (min) () { return INT_MIN; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int (max) () { return INT_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int lowest () { return INT_MIN; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int (min) () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int (max) () { return UINT_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int lowest () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long (min) () { return LONG_MIN; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long (max) () { return LONG_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long lowest () { return LONG_MIN; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long (min) () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long (max) () { return ULONG_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long lowest () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long (min) () { return LLONG_MIN; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long (max) () { return LLONG_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long lowest () { return LLONG_MIN; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long (min) () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long (max) () { return ULLONG_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long lowest () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool (min) () { return false; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool (max) () { return true; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool lowest () { return false; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool epsilon () { return false; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool round_error () { return false; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool infinity () { return false; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool quiet_NaN () { return false; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool signaling_NaN () { return false; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool denorm_min () { return false; } +}; + +#elif defined(BOOST_MATH_HAS_NVRTC) // Pure NVRTC support - Removes rounding style and approximates the traits + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = true; + BOOST_MATH_STATIC constexpr bool is_signed = true; + BOOST_MATH_STATIC constexpr bool is_integer = false; + BOOST_MATH_STATIC constexpr bool is_exact = false; + BOOST_MATH_STATIC constexpr bool has_infinity = true; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = true; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = true; + + BOOST_MATH_STATIC constexpr bool is_iec559 = true; + BOOST_MATH_STATIC constexpr bool is_bounded = true; + BOOST_MATH_STATIC constexpr bool is_modulo = false; + BOOST_MATH_STATIC constexpr int digits = 24; + BOOST_MATH_STATIC constexpr int digits10 = 6; + BOOST_MATH_STATIC constexpr int max_digits10 = 9; + BOOST_MATH_STATIC constexpr int radix = 2; + BOOST_MATH_STATIC constexpr int min_exponent = -125; + BOOST_MATH_STATIC constexpr int min_exponent10 = -37; + BOOST_MATH_STATIC constexpr int max_exponent = 128; + BOOST_MATH_STATIC constexpr int max_exponent10 = 38; + BOOST_MATH_STATIC constexpr bool traps = false; + BOOST_MATH_STATIC constexpr bool tinyness_before = false; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float (min) () { return 1.17549435e-38F; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float (max) () { return 3.40282347e+38F; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float lowest () { return -3.40282347e+38F; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float epsilon () { return 1.1920929e-07; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float round_error () { return 0.5F; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float infinity () { return __int_as_float(0x7f800000); } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float quiet_NaN () { return __int_as_float(0x7fc00000); } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float signaling_NaN () { return __int_as_float(0x7fa00000); } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float denorm_min () { return 1.4013e-45F; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = true; + BOOST_MATH_STATIC constexpr bool is_signed = true; + BOOST_MATH_STATIC constexpr bool is_integer = false; + BOOST_MATH_STATIC constexpr bool is_exact = false; + BOOST_MATH_STATIC constexpr bool has_infinity = true; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = true; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = true; + + BOOST_MATH_STATIC constexpr bool is_iec559 = true; + BOOST_MATH_STATIC constexpr bool is_bounded = true; + BOOST_MATH_STATIC constexpr bool is_modulo = false; + BOOST_MATH_STATIC constexpr int digits = 53; + BOOST_MATH_STATIC constexpr int digits10 = 15; + BOOST_MATH_STATIC constexpr int max_digits10 = 21; + BOOST_MATH_STATIC constexpr int radix = 2; + BOOST_MATH_STATIC constexpr int min_exponent = -1021; + BOOST_MATH_STATIC constexpr int min_exponent10 = -307; + BOOST_MATH_STATIC constexpr int max_exponent = 1024; + BOOST_MATH_STATIC constexpr int max_exponent10 = 308; + BOOST_MATH_STATIC constexpr bool traps = false; + BOOST_MATH_STATIC constexpr bool tinyness_before = false; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double (min) () { return 2.2250738585072014e-308; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double (max) () { return 1.7976931348623157e+308; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double lowest () { return -1.7976931348623157e+308; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double epsilon () { return 2.2204460492503131e-16; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double round_error () { return 0.5; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double infinity () { return __longlong_as_double(0x7ff0000000000000ULL); } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double quiet_NaN () { return __longlong_as_double(0x7ff8000000000000ULL); } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double signaling_NaN () { return __longlong_as_double(0x7ff4000000000000ULL); } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double denorm_min () { return 4.9406564584124654e-324; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = true; + BOOST_MATH_STATIC constexpr bool is_signed = true; + BOOST_MATH_STATIC constexpr bool is_integer = true; + BOOST_MATH_STATIC constexpr bool is_exact = true; + BOOST_MATH_STATIC constexpr bool has_infinity = false; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = false; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = false; + + BOOST_MATH_STATIC constexpr bool is_iec559 = false; + BOOST_MATH_STATIC constexpr bool is_bounded = true; + BOOST_MATH_STATIC constexpr bool is_modulo = false; + BOOST_MATH_STATIC constexpr int digits = 15; + BOOST_MATH_STATIC constexpr int digits10 = 4; + BOOST_MATH_STATIC constexpr int max_digits10 = 0; + BOOST_MATH_STATIC constexpr int radix = 2; + BOOST_MATH_STATIC constexpr int min_exponent = 0; + BOOST_MATH_STATIC constexpr int min_exponent10 = 0; + BOOST_MATH_STATIC constexpr int max_exponent = 0; + BOOST_MATH_STATIC constexpr int max_exponent10 = 0; + BOOST_MATH_STATIC constexpr bool traps = true; + BOOST_MATH_STATIC constexpr bool tinyness_before = false; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short (min) () { return -32768; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short (max) () { return 32767; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short lowest () { return -32768; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = true; + BOOST_MATH_STATIC constexpr bool is_signed = false; + BOOST_MATH_STATIC constexpr bool is_integer = true; + BOOST_MATH_STATIC constexpr bool is_exact = true; + BOOST_MATH_STATIC constexpr bool has_infinity = false; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = false; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = false; + + BOOST_MATH_STATIC constexpr bool is_iec559 = false; + BOOST_MATH_STATIC constexpr bool is_bounded = true; + BOOST_MATH_STATIC constexpr bool is_modulo = true; + BOOST_MATH_STATIC constexpr int digits = 16; + BOOST_MATH_STATIC constexpr int digits10 = 4; + BOOST_MATH_STATIC constexpr int max_digits10 = 0; + BOOST_MATH_STATIC constexpr int radix = 2; + BOOST_MATH_STATIC constexpr int min_exponent = 0; + BOOST_MATH_STATIC constexpr int min_exponent10 = 0; + BOOST_MATH_STATIC constexpr int max_exponent = 0; + BOOST_MATH_STATIC constexpr int max_exponent10 = 0; + BOOST_MATH_STATIC constexpr bool traps = true; + BOOST_MATH_STATIC constexpr bool tinyness_before = false; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short (min) () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short (max) () { return 65535U; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short lowest () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = true; + BOOST_MATH_STATIC constexpr bool is_signed = true; + BOOST_MATH_STATIC constexpr bool is_integer = true; + BOOST_MATH_STATIC constexpr bool is_exact = true; + BOOST_MATH_STATIC constexpr bool has_infinity = false; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = false; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = false; + + BOOST_MATH_STATIC constexpr bool is_iec559 = false; + BOOST_MATH_STATIC constexpr bool is_bounded = true; + BOOST_MATH_STATIC constexpr bool is_modulo = false; + BOOST_MATH_STATIC constexpr int digits = 31; + BOOST_MATH_STATIC constexpr int digits10 = 9; + BOOST_MATH_STATIC constexpr int max_digits10 = 0; + BOOST_MATH_STATIC constexpr int radix = 2; + BOOST_MATH_STATIC constexpr int min_exponent = 0; + BOOST_MATH_STATIC constexpr int min_exponent10 = 0; + BOOST_MATH_STATIC constexpr int max_exponent = 0; + BOOST_MATH_STATIC constexpr int max_exponent10 = 0; + BOOST_MATH_STATIC constexpr bool traps = true; + BOOST_MATH_STATIC constexpr bool tinyness_before = false; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int (min) () { return -2147483648; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int (max) () { return 2147483647; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int lowest () { return -2147483648; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = true; + BOOST_MATH_STATIC constexpr bool is_signed = false; + BOOST_MATH_STATIC constexpr bool is_integer = true; + BOOST_MATH_STATIC constexpr bool is_exact = true; + BOOST_MATH_STATIC constexpr bool has_infinity = false; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = false; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = false; + + BOOST_MATH_STATIC constexpr bool is_iec559 = false; + BOOST_MATH_STATIC constexpr bool is_bounded = true; + BOOST_MATH_STATIC constexpr bool is_modulo = true; + BOOST_MATH_STATIC constexpr int digits = 32; + BOOST_MATH_STATIC constexpr int digits10 = 9; + BOOST_MATH_STATIC constexpr int max_digits10 = 0; + BOOST_MATH_STATIC constexpr int radix = 2; + BOOST_MATH_STATIC constexpr int min_exponent = 0; + BOOST_MATH_STATIC constexpr int min_exponent10 = 0; + BOOST_MATH_STATIC constexpr int max_exponent = 0; + BOOST_MATH_STATIC constexpr int max_exponent10 = 0; + BOOST_MATH_STATIC constexpr bool traps = true; + BOOST_MATH_STATIC constexpr bool tinyness_before = false; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int (min) () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int (max) () { return 4294967295U; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int lowest () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = true; + BOOST_MATH_STATIC constexpr bool is_signed = true; + BOOST_MATH_STATIC constexpr bool is_integer = true; + BOOST_MATH_STATIC constexpr bool is_exact = true; + BOOST_MATH_STATIC constexpr bool has_infinity = false; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = false; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = false; + + BOOST_MATH_STATIC constexpr bool is_iec559 = false; + BOOST_MATH_STATIC constexpr bool is_bounded = true; + BOOST_MATH_STATIC constexpr bool is_modulo = false; + BOOST_MATH_STATIC constexpr int digits = 63; + BOOST_MATH_STATIC constexpr int digits10 = 18; + BOOST_MATH_STATIC constexpr int max_digits10 = 0; + BOOST_MATH_STATIC constexpr int radix = 2; + BOOST_MATH_STATIC constexpr int min_exponent = 0; + BOOST_MATH_STATIC constexpr int min_exponent10 = 0; + BOOST_MATH_STATIC constexpr int max_exponent = 0; + BOOST_MATH_STATIC constexpr int max_exponent10 = 0; + BOOST_MATH_STATIC constexpr bool traps = true; + BOOST_MATH_STATIC constexpr bool tinyness_before = false; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long (min) () { return -9223372036854775808L; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long (max) () { return 9223372036854775807L; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long lowest () { return -9223372036854775808L; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = true; + BOOST_MATH_STATIC constexpr bool is_signed = false; + BOOST_MATH_STATIC constexpr bool is_integer = true; + BOOST_MATH_STATIC constexpr bool is_exact = true; + BOOST_MATH_STATIC constexpr bool has_infinity = false; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = false; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = false; + + BOOST_MATH_STATIC constexpr bool is_iec559 = false; + BOOST_MATH_STATIC constexpr bool is_bounded = true; + BOOST_MATH_STATIC constexpr bool is_modulo = true; + BOOST_MATH_STATIC constexpr int digits = 64; + BOOST_MATH_STATIC constexpr int digits10 = 19; + BOOST_MATH_STATIC constexpr int max_digits10 = 0; + BOOST_MATH_STATIC constexpr int radix = 2; + BOOST_MATH_STATIC constexpr int min_exponent = 0; + BOOST_MATH_STATIC constexpr int min_exponent10 = 0; + BOOST_MATH_STATIC constexpr int max_exponent = 0; + BOOST_MATH_STATIC constexpr int max_exponent10 = 0; + BOOST_MATH_STATIC constexpr bool traps = true; + BOOST_MATH_STATIC constexpr bool tinyness_before = false; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long (min) () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long (max) () { return 18446744073709551615UL; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long lowest () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = true; + BOOST_MATH_STATIC constexpr bool is_signed = true; + BOOST_MATH_STATIC constexpr bool is_integer = true; + BOOST_MATH_STATIC constexpr bool is_exact = true; + BOOST_MATH_STATIC constexpr bool has_infinity = false; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = false; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = false; + + BOOST_MATH_STATIC constexpr bool is_iec559 = false; + BOOST_MATH_STATIC constexpr bool is_bounded = true; + BOOST_MATH_STATIC constexpr bool is_modulo = false; + BOOST_MATH_STATIC constexpr int digits = 63; + BOOST_MATH_STATIC constexpr int digits10 = 18; + BOOST_MATH_STATIC constexpr int max_digits10 = 0; + BOOST_MATH_STATIC constexpr int radix = 2; + BOOST_MATH_STATIC constexpr int min_exponent = 0; + BOOST_MATH_STATIC constexpr int min_exponent10 = 0; + BOOST_MATH_STATIC constexpr int max_exponent = 0; + BOOST_MATH_STATIC constexpr int max_exponent10 = 0; + BOOST_MATH_STATIC constexpr bool traps = true; + BOOST_MATH_STATIC constexpr bool tinyness_before = false; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long (min) () { return -9223372036854775808LL; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long (max) () { return 9223372036854775807LL; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long lowest () { return -9223372036854775808LL; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = true; + BOOST_MATH_STATIC constexpr bool is_signed = false; + BOOST_MATH_STATIC constexpr bool is_integer = true; + BOOST_MATH_STATIC constexpr bool is_exact = true; + BOOST_MATH_STATIC constexpr bool has_infinity = false; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = false; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = false; + + BOOST_MATH_STATIC constexpr bool is_iec559 = false; + BOOST_MATH_STATIC constexpr bool is_bounded = true; + BOOST_MATH_STATIC constexpr bool is_modulo = true; + BOOST_MATH_STATIC constexpr int digits = 64; + BOOST_MATH_STATIC constexpr int digits10 = 19; + BOOST_MATH_STATIC constexpr int max_digits10 = 0; + BOOST_MATH_STATIC constexpr int radix = 2; + BOOST_MATH_STATIC constexpr int min_exponent = 0; + BOOST_MATH_STATIC constexpr int min_exponent10 = 0; + BOOST_MATH_STATIC constexpr int max_exponent = 0; + BOOST_MATH_STATIC constexpr int max_exponent10 = 0; + BOOST_MATH_STATIC constexpr bool traps = true; + BOOST_MATH_STATIC constexpr bool tinyness_before = false; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long (min) () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long (max) () { return 18446744073709551615UL; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long lowest () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = true; + BOOST_MATH_STATIC constexpr bool is_signed = false; + BOOST_MATH_STATIC constexpr bool is_integer = true; + BOOST_MATH_STATIC constexpr bool is_exact = true; + BOOST_MATH_STATIC constexpr bool has_infinity = false; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = false; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = false; + + BOOST_MATH_STATIC constexpr bool is_iec559 = false; + BOOST_MATH_STATIC constexpr bool is_bounded = true; + BOOST_MATH_STATIC constexpr bool is_modulo = false; + BOOST_MATH_STATIC constexpr int digits = 1; + BOOST_MATH_STATIC constexpr int digits10 = 0; + BOOST_MATH_STATIC constexpr int max_digits10 = 0; + BOOST_MATH_STATIC constexpr int radix = 2; + BOOST_MATH_STATIC constexpr int min_exponent = 0; + BOOST_MATH_STATIC constexpr int min_exponent10 = 0; + BOOST_MATH_STATIC constexpr int max_exponent = 0; + BOOST_MATH_STATIC constexpr int max_exponent10 = 0; + BOOST_MATH_STATIC constexpr bool traps = false; + BOOST_MATH_STATIC constexpr bool tinyness_before = false; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool (min) () { return false; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool (max) () { return true; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool lowest () { return false; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool epsilon () { return false; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool round_error () { return false; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool infinity () { return false; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool quiet_NaN () { return false; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool signaling_NaN () { return false; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool denorm_min () { return false; } +}; + +#endif // BOOST_MATH_HAS_GPU_SUPPORT + +} // namespace math +} // namespace boost + +#endif diff --git a/include/boost/math/tools/polynomial.hpp b/include/boost/math/tools/polynomial.hpp index 6f9b9039fd..5d395cdbed 100644 --- a/include/boost/math/tools/polynomial.hpp +++ b/include/boost/math/tools/polynomial.hpp @@ -32,7 +32,7 @@ namespace boost{ namespace math{ namespace tools{ template -T chebyshev_coefficient(unsigned n, unsigned m) +BOOST_MATH_GPU_ENABLED T chebyshev_coefficient(unsigned n, unsigned m) { BOOST_MATH_STD_USING if(m > n) @@ -56,7 +56,7 @@ T chebyshev_coefficient(unsigned n, unsigned m) } template -Seq polynomial_to_chebyshev(const Seq& s) +BOOST_MATH_GPU_ENABLED Seq polynomial_to_chebyshev(const Seq& s) { // Converts a Polynomial into Chebyshev form: typedef typename Seq::value_type value_type; @@ -92,7 +92,7 @@ Seq polynomial_to_chebyshev(const Seq& s) } template -T evaluate_chebyshev(const Seq& a, const T& x) +BOOST_MATH_GPU_ENABLED T evaluate_chebyshev(const Seq& a, const T& x) { // Clenshaw's formula: typedef typename Seq::difference_type difference_type; @@ -124,7 +124,7 @@ namespace detail { * subtlety of distinction. */ template -typename std::enable_if::is_integer, void >::type +BOOST_MATH_GPU_ENABLED typename std::enable_if::is_integer, void >::type division_impl(polynomial &q, polynomial &u, const polynomial& v, N n, N k) { q[k] = u[n + k] / v[n]; @@ -136,7 +136,7 @@ division_impl(polynomial &q, polynomial &u, const polynomial& v, N n, N } template -T integer_power(T t, N n) +BOOST_MATH_GPU_ENABLED T integer_power(T t, N n) { switch(n) { @@ -167,7 +167,7 @@ T integer_power(T t, N n) * don't currently have that subtlety of distinction. */ template -typename std::enable_if::is_integer, void >::type +BOOST_MATH_GPU_ENABLED typename std::enable_if::is_integer, void >::type division_impl(polynomial &q, polynomial &u, const polynomial& v, N n, N k) { q[k] = u[n + k] * integer_power(v[n], k); @@ -187,7 +187,7 @@ division_impl(polynomial &q, polynomial &u, const polynomial& v, N n, N * @param v Divisor. */ template -std::pair< polynomial, polynomial > +BOOST_MATH_GPU_ENABLED std::pair< polynomial, polynomial > division(polynomial u, const polynomial& v) { BOOST_MATH_ASSERT(v.size() <= u.size()); @@ -218,7 +218,7 @@ division(polynomial u, const polynomial& v) struct negate { template - T operator()(T const &x) const + BOOST_MATH_GPU_ENABLED T operator()(T const &x) const { return -x; } @@ -227,7 +227,7 @@ struct negate struct plus { template - T operator()(T const &x, U const& y) const + BOOST_MATH_GPU_ENABLED T operator()(T const &x, U const& y) const { return x + y; } @@ -236,7 +236,7 @@ struct plus struct minus { template - T operator()(T const &x, U const& y) const + BOOST_MATH_GPU_ENABLED T operator()(T const &x, U const& y) const { return x - y; } @@ -248,13 +248,13 @@ struct minus * Returns the zero element for multiplication of polynomials. */ template -polynomial zero_element(std::multiplies< polynomial >) +BOOST_MATH_GPU_ENABLED polynomial zero_element(std::multiplies< polynomial >) { return polynomial(); } template -polynomial identity_element(std::multiplies< polynomial >) +BOOST_MATH_GPU_ENABLED polynomial identity_element(std::multiplies< polynomial >) { return polynomial(T(1)); } @@ -264,7 +264,7 @@ polynomial identity_element(std::multiplies< polynomial >) * This function is not defined for division by zero: user beware. */ template -std::pair< polynomial, polynomial > +BOOST_MATH_GPU_ENABLED std::pair< polynomial, polynomial > quotient_remainder(const polynomial& dividend, const polynomial& divisor) { BOOST_MATH_ASSERT(divisor); @@ -283,51 +283,51 @@ class polynomial typedef typename std::vector::size_type size_type; // construct: - polynomial()= default; + BOOST_MATH_GPU_ENABLED polynomial()= default; template - polynomial(const U* data, unsigned order) + BOOST_MATH_GPU_ENABLED polynomial(const U* data, unsigned order) : m_data(data, data + order + 1) { normalize(); } template - polynomial(Iterator first, Iterator last) + BOOST_MATH_GPU_ENABLED polynomial(Iterator first, Iterator last) : m_data(first, last) { normalize(); } template - polynomial(Iterator first, unsigned length) + BOOST_MATH_GPU_ENABLED polynomial(Iterator first, unsigned length) : m_data(first, std::next(first, length + 1)) { normalize(); } - polynomial(std::vector&& p) : m_data(std::move(p)) + BOOST_MATH_GPU_ENABLED polynomial(std::vector&& p) : m_data(std::move(p)) { normalize(); } template ::value, bool>::type = true> - explicit polynomial(const U& point) + BOOST_MATH_GPU_ENABLED explicit polynomial(const U& point) { if (point != U(0)) m_data.push_back(point); } // move: - polynomial(polynomial&& p) noexcept + BOOST_MATH_GPU_ENABLED polynomial(polynomial&& p) noexcept : m_data(std::move(p.m_data)) { } // copy: - polynomial(const polynomial& p) + BOOST_MATH_GPU_ENABLED polynomial(const polynomial& p) : m_data(p.m_data) { } template - polynomial(const polynomial& p) + BOOST_MATH_GPU_ENABLED polynomial(const polynomial& p) { m_data.resize(p.size()); for(unsigned i = 0; i < p.size(); ++i) @@ -337,17 +337,17 @@ class polynomial } #ifdef BOOST_MATH_HAS_IS_CONST_ITERABLE template ::value, bool>::type = true> - explicit polynomial(const Range& r) + BOOST_MATH_GPU_ENABLED explicit polynomial(const Range& r) : polynomial(r.begin(), r.end()) { } #endif - polynomial(std::initializer_list l) : polynomial(std::begin(l), std::end(l)) + BOOST_MATH_GPU_ENABLED polynomial(std::initializer_list l) : polynomial(std::begin(l), std::end(l)) { } polynomial& - operator=(std::initializer_list l) + BOOST_MATH_GPU_ENABLED operator=(std::initializer_list l) { m_data.assign(std::begin(l), std::end(l)); normalize(); @@ -356,47 +356,47 @@ class polynomial // access: - size_type size() const { return m_data.size(); } - size_type degree() const + BOOST_MATH_GPU_ENABLED size_type size() const { return m_data.size(); } + BOOST_MATH_GPU_ENABLED size_type degree() const { if (size() == 0) BOOST_MATH_THROW_EXCEPTION(std::logic_error("degree() is undefined for the zero polynomial.")); return m_data.size() - 1; } - value_type& operator[](size_type i) + BOOST_MATH_GPU_ENABLED value_type& operator[](size_type i) { return m_data[i]; } - const value_type& operator[](size_type i) const + BOOST_MATH_GPU_ENABLED const value_type& operator[](size_type i) const { return m_data[i]; } - T evaluate(T z) const + BOOST_MATH_GPU_ENABLED T evaluate(T z) const { return this->operator()(z); } - T operator()(T z) const + BOOST_MATH_GPU_ENABLED T operator()(T z) const { return m_data.size() > 0 ? boost::math::tools::evaluate_polynomial((m_data).data(), z, m_data.size()) : T(0); } - std::vector chebyshev() const + BOOST_MATH_GPU_ENABLED std::vector chebyshev() const { return polynomial_to_chebyshev(m_data); } - std::vector const& data() const + BOOST_MATH_GPU_ENABLED std::vector const& data() const { return m_data; } - std::vector & data() + BOOST_MATH_GPU_ENABLED std::vector & data() { return m_data; } - polynomial prime() const + BOOST_MATH_GPU_ENABLED polynomial prime() const { #ifdef _MSC_VER // Disable int->float conversion warning: @@ -418,7 +418,7 @@ class polynomial #endif } - polynomial integrate() const + BOOST_MATH_GPU_ENABLED polynomial integrate() const { std::vector i_data(m_data.size() + 1); // Choose integration constant such that P(0) = 0. @@ -431,20 +431,20 @@ class polynomial } // operators: - polynomial& operator =(polynomial&& p) noexcept + BOOST_MATH_GPU_ENABLED polynomial& operator =(polynomial&& p) noexcept { m_data = std::move(p.m_data); return *this; } - polynomial& operator =(const polynomial& p) + BOOST_MATH_GPU_ENABLED polynomial& operator =(const polynomial& p) { m_data = p.m_data; return *this; } template - typename std::enable_if::value, polynomial&>::type operator +=(const U& value) + BOOST_MATH_GPU_ENABLED typename std::enable_if::value, polynomial&>::type operator +=(const U& value) { addition(value); normalize(); @@ -452,7 +452,7 @@ class polynomial } template - typename std::enable_if::value, polynomial&>::type operator -=(const U& value) + BOOST_MATH_GPU_ENABLED typename std::enable_if::value, polynomial&>::type operator -=(const U& value) { subtraction(value); normalize(); @@ -460,7 +460,7 @@ class polynomial } template - typename std::enable_if::value, polynomial&>::type operator *=(const U& value) + BOOST_MATH_GPU_ENABLED typename std::enable_if::value, polynomial&>::type operator *=(const U& value) { multiplication(value); normalize(); @@ -468,7 +468,7 @@ class polynomial } template - typename std::enable_if::value, polynomial&>::type operator /=(const U& value) + BOOST_MATH_GPU_ENABLED typename std::enable_if::value, polynomial&>::type operator /=(const U& value) { division(value); normalize(); @@ -476,7 +476,7 @@ class polynomial } template - typename std::enable_if::value, polynomial&>::type operator %=(const U& /*value*/) + BOOST_MATH_GPU_ENABLED typename std::enable_if::value, polynomial&>::type operator %=(const U& /*value*/) { // We can always divide by a scalar, so there is no remainder: this->set_zero(); @@ -484,7 +484,7 @@ class polynomial } template - polynomial& operator +=(const polynomial& value) + BOOST_MATH_GPU_ENABLED polynomial& operator +=(const polynomial& value) { addition(value); normalize(); @@ -492,7 +492,7 @@ class polynomial } template - polynomial& operator -=(const polynomial& value) + BOOST_MATH_GPU_ENABLED polynomial& operator -=(const polynomial& value) { subtraction(value); normalize(); @@ -500,7 +500,7 @@ class polynomial } template - void multiply(const polynomial& a, const polynomial& b) { + BOOST_MATH_GPU_ENABLED void multiply(const polynomial& a, const polynomial& b) { if (!a || !b) { this->set_zero(); @@ -514,28 +514,28 @@ class polynomial } template - polynomial& operator *=(const polynomial& value) + BOOST_MATH_GPU_ENABLED polynomial& operator *=(const polynomial& value) { this->multiply(*this, value); return *this; } template - polynomial& operator /=(const polynomial& value) + BOOST_MATH_GPU_ENABLED polynomial& operator /=(const polynomial& value) { *this = quotient_remainder(*this, value).first; return *this; } template - polynomial& operator %=(const polynomial& value) + BOOST_MATH_GPU_ENABLED polynomial& operator %=(const polynomial& value) { *this = quotient_remainder(*this, value).second; return *this; } template - polynomial& operator >>=(U const &n) + BOOST_MATH_GPU_ENABLED polynomial& operator >>=(U const &n) { BOOST_MATH_ASSERT(n <= m_data.size()); m_data.erase(m_data.begin(), m_data.begin() + n); @@ -543,7 +543,7 @@ class polynomial } template - polynomial& operator <<=(U const &n) + BOOST_MATH_GPU_ENABLED polynomial& operator <<=(U const &n) { m_data.insert(m_data.begin(), n, static_cast(0)); normalize(); @@ -551,33 +551,33 @@ class polynomial } // Convenient and efficient query for zero. - bool is_zero() const + BOOST_MATH_GPU_ENABLED bool is_zero() const { return m_data.empty(); } // Conversion to bool. - inline explicit operator bool() const + BOOST_MATH_GPU_ENABLED inline explicit operator bool() const { return !m_data.empty(); } // Fast way to set a polynomial to zero. - void set_zero() + BOOST_MATH_GPU_ENABLED void set_zero() { m_data.clear(); } /** Remove zero coefficients 'from the top', that is for which there are no * non-zero coefficients of higher degree. */ - void normalize() + BOOST_MATH_GPU_ENABLED void normalize() { m_data.erase(std::find_if(m_data.rbegin(), m_data.rend(), [](const T& x)->bool { return x != T(0); }).base(), m_data.end()); } private: template - polynomial& addition(const U& value, R op) + BOOST_MATH_GPU_ENABLED polynomial& addition(const U& value, R op) { if(m_data.size() == 0) m_data.resize(1, 0); @@ -586,19 +586,19 @@ class polynomial } template - polynomial& addition(const U& value) + BOOST_MATH_GPU_ENABLED polynomial& addition(const U& value) { return addition(value, detail::plus()); } template - polynomial& subtraction(const U& value) + BOOST_MATH_GPU_ENABLED polynomial& subtraction(const U& value) { return addition(value, detail::minus()); } template - polynomial& addition(const polynomial& value, R op) + BOOST_MATH_GPU_ENABLED polynomial& addition(const polynomial& value, R op) { if (m_data.size() < value.size()) m_data.resize(value.size(), 0); @@ -608,26 +608,26 @@ class polynomial } template - polynomial& addition(const polynomial& value) + BOOST_MATH_GPU_ENABLED polynomial& addition(const polynomial& value) { return addition(value, detail::plus()); } template - polynomial& subtraction(const polynomial& value) + BOOST_MATH_GPU_ENABLED polynomial& subtraction(const polynomial& value) { return addition(value, detail::minus()); } template - polynomial& multiplication(const U& value) + BOOST_MATH_GPU_ENABLED polynomial& multiplication(const U& value) { std::transform(m_data.begin(), m_data.end(), m_data.begin(), [&](const T& x)->T { return x * value; }); return *this; } template - polynomial& division(const U& value) + BOOST_MATH_GPU_ENABLED polynomial& division(const U& value) { std::transform(m_data.begin(), m_data.end(), m_data.begin(), [&](const T& x)->T { return x / value; }); return *this; @@ -638,7 +638,7 @@ class polynomial template -inline polynomial operator + (const polynomial& a, const polynomial& b) +BOOST_MATH_GPU_ENABLED inline polynomial operator + (const polynomial& a, const polynomial& b) { polynomial result(a); result += b; @@ -646,26 +646,26 @@ inline polynomial operator + (const polynomial& a, const polynomial& b) } template -inline polynomial operator + (polynomial&& a, const polynomial& b) +BOOST_MATH_GPU_ENABLED inline polynomial operator + (polynomial&& a, const polynomial& b) { a += b; return std::move(a); } template -inline polynomial operator + (const polynomial& a, polynomial&& b) +BOOST_MATH_GPU_ENABLED inline polynomial operator + (const polynomial& a, polynomial&& b) { b += a; return b; } template -inline polynomial operator + (polynomial&& a, polynomial&& b) +BOOST_MATH_GPU_ENABLED inline polynomial operator + (polynomial&& a, polynomial&& b) { a += b; return a; } template -inline polynomial operator - (const polynomial& a, const polynomial& b) +BOOST_MATH_GPU_ENABLED inline polynomial operator - (const polynomial& a, const polynomial& b) { polynomial result(a); result -= b; @@ -673,26 +673,26 @@ inline polynomial operator - (const polynomial& a, const polynomial& b) } template -inline polynomial operator - (polynomial&& a, const polynomial& b) +BOOST_MATH_GPU_ENABLED inline polynomial operator - (polynomial&& a, const polynomial& b) { a -= b; return a; } template -inline polynomial operator - (const polynomial& a, polynomial&& b) +BOOST_MATH_GPU_ENABLED inline polynomial operator - (const polynomial& a, polynomial&& b) { b -= a; return -b; } template -inline polynomial operator - (polynomial&& a, polynomial&& b) +BOOST_MATH_GPU_ENABLED inline polynomial operator - (polynomial&& a, polynomial&& b) { a -= b; return a; } template -inline polynomial operator * (const polynomial& a, const polynomial& b) +BOOST_MATH_GPU_ENABLED inline polynomial operator * (const polynomial& a, const polynomial& b) { polynomial result; result.multiply(a, b); @@ -700,94 +700,94 @@ inline polynomial operator * (const polynomial& a, const polynomial& b) } template -inline polynomial operator / (const polynomial& a, const polynomial& b) +BOOST_MATH_GPU_ENABLED inline polynomial operator / (const polynomial& a, const polynomial& b) { return quotient_remainder(a, b).first; } template -inline polynomial operator % (const polynomial& a, const polynomial& b) +BOOST_MATH_GPU_ENABLED inline polynomial operator % (const polynomial& a, const polynomial& b) { return quotient_remainder(a, b).second; } template -inline typename std::enable_if::value, polynomial >::type operator + (polynomial a, const U& b) +BOOST_MATH_GPU_ENABLED inline typename std::enable_if::value, polynomial >::type operator + (polynomial a, const U& b) { a += b; return a; } template -inline typename std::enable_if::value, polynomial >::type operator - (polynomial a, const U& b) +BOOST_MATH_GPU_ENABLED inline typename std::enable_if::value, polynomial >::type operator - (polynomial a, const U& b) { a -= b; return a; } template -inline typename std::enable_if::value, polynomial >::type operator * (polynomial a, const U& b) +BOOST_MATH_GPU_ENABLED inline typename std::enable_if::value, polynomial >::type operator * (polynomial a, const U& b) { a *= b; return a; } template -inline typename std::enable_if::value, polynomial >::type operator / (polynomial a, const U& b) +BOOST_MATH_GPU_ENABLED inline typename std::enable_if::value, polynomial >::type operator / (polynomial a, const U& b) { a /= b; return a; } template -inline typename std::enable_if::value, polynomial >::type operator % (const polynomial&, const U&) +BOOST_MATH_GPU_ENABLED inline typename std::enable_if::value, polynomial >::type operator % (const polynomial&, const U&) { // Since we can always divide by a scalar, result is always an empty polynomial: return polynomial(); } template -inline typename std::enable_if::value, polynomial >::type operator + (const U& a, polynomial b) +BOOST_MATH_GPU_ENABLED inline typename std::enable_if::value, polynomial >::type operator + (const U& a, polynomial b) { b += a; return b; } template -inline typename std::enable_if::value, polynomial >::type operator - (const U& a, polynomial b) +BOOST_MATH_GPU_ENABLED inline typename std::enable_if::value, polynomial >::type operator - (const U& a, polynomial b) { b -= a; return -b; } template -inline typename std::enable_if::value, polynomial >::type operator * (const U& a, polynomial b) +BOOST_MATH_GPU_ENABLED inline typename std::enable_if::value, polynomial >::type operator * (const U& a, polynomial b) { b *= a; return b; } template -bool operator == (const polynomial &a, const polynomial &b) +BOOST_MATH_GPU_ENABLED bool operator == (const polynomial &a, const polynomial &b) { return a.data() == b.data(); } template -bool operator != (const polynomial &a, const polynomial &b) +BOOST_MATH_GPU_ENABLED bool operator != (const polynomial &a, const polynomial &b) { return a.data() != b.data(); } template -polynomial operator >> (polynomial a, const U& b) +BOOST_MATH_GPU_ENABLED polynomial operator >> (polynomial a, const U& b) { a >>= b; return a; } template -polynomial operator << (polynomial a, const U& b) +BOOST_MATH_GPU_ENABLED polynomial operator << (polynomial a, const U& b) { a <<= b; return a; @@ -795,26 +795,26 @@ polynomial operator << (polynomial a, const U& b) // Unary minus (negate). template -polynomial operator - (polynomial a) +BOOST_MATH_GPU_ENABLED polynomial operator - (polynomial a) { std::transform(a.data().begin(), a.data().end(), a.data().begin(), detail::negate()); return a; } template -bool odd(polynomial const &a) +BOOST_MATH_GPU_ENABLED bool odd(polynomial const &a) { return a.size() > 0 && a[0] != static_cast(0); } template -bool even(polynomial const &a) +BOOST_MATH_GPU_ENABLED bool even(polynomial const &a) { return !odd(a); } template -polynomial pow(polynomial base, int exp) +BOOST_MATH_GPU_ENABLED polynomial pow(polynomial base, int exp) { if (exp < 0) return policies::raise_domain_error( @@ -838,7 +838,7 @@ polynomial pow(polynomial base, int exp) } template -inline std::basic_ostream& operator << (std::basic_ostream& os, const polynomial& poly) +BOOST_MATH_GPU_ENABLED inline std::basic_ostream& operator << (std::basic_ostream& os, const polynomial& poly) { os << "{ "; for(unsigned i = 0; i < poly.size(); ++i) diff --git a/include/boost/math/tools/precision.hpp b/include/boost/math/tools/precision.hpp index d1643e01d3..662657732c 100644 --- a/include/boost/math/tools/precision.hpp +++ b/include/boost/math/tools/precision.hpp @@ -10,14 +10,20 @@ #pragma once #endif +#include #include +#include +#include #include + +#ifndef BOOST_MATH_HAS_NVRTC #include #include #include #include #include #include // LDBL_MANT_DIG +#endif namespace boost{ namespace math { @@ -36,30 +42,30 @@ namespace tools // See Conceptual Requirements for Real Number Types. template -inline constexpr int digits(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T)) noexcept +BOOST_MATH_GPU_ENABLED inline constexpr int digits(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T)) noexcept { - static_assert( ::std::numeric_limits::is_specialized, "Type T must be specialized"); - static_assert( ::std::numeric_limits::radix == 2 || ::std::numeric_limits::radix == 10, "Type T must have a radix of 2 or 10"); + static_assert( ::boost::math::numeric_limits::is_specialized, "Type T must be specialized"); + static_assert( ::boost::math::numeric_limits::radix == 2 || ::boost::math::numeric_limits::radix == 10, "Type T must have a radix of 2 or 10"); - return std::numeric_limits::radix == 2 - ? std::numeric_limits::digits - : ((std::numeric_limits::digits + 1) * 1000L) / 301L; + return boost::math::numeric_limits::radix == 2 + ? boost::math::numeric_limits::digits + : ((boost::math::numeric_limits::digits + 1) * 1000L) / 301L; } template -inline constexpr T max_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED inline constexpr T max_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(boost::math::is_floating_point::value) { - static_assert( ::std::numeric_limits::is_specialized, "Type T must be specialized"); - return (std::numeric_limits::max)(); + static_assert( ::boost::math::numeric_limits::is_specialized, "Type T must be specialized"); + return (boost::math::numeric_limits::max)(); } // Also used as a finite 'infinite' value for - and +infinity, for example: // -max_value = -1.79769e+308, max_value = 1.79769e+308. template -inline constexpr T min_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED inline constexpr T min_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(boost::math::is_floating_point::value) { - static_assert( ::std::numeric_limits::is_specialized, "Type T must be specialized"); + static_assert( ::boost::math::numeric_limits::is_specialized, "Type T must be specialized"); - return (std::numeric_limits::min)(); + return (boost::math::numeric_limits::min)(); } namespace detail{ @@ -72,13 +78,13 @@ namespace detail{ // For type float first: // template -inline constexpr T log_max_value(const std::integral_constant& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED constexpr T log_max_value(const boost::math::integral_constant& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(boost::math::is_floating_point::value) { return 88.0f; } template -inline constexpr T log_min_value(const std::integral_constant& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED constexpr T log_min_value(const boost::math::integral_constant& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(boost::math::is_floating_point::value) { return -87.0f; } @@ -86,13 +92,13 @@ inline constexpr T log_min_value(const std::integral_constant& BOOST_M // Now double: // template -inline constexpr T log_max_value(const std::integral_constant& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED constexpr T log_max_value(const boost::math::integral_constant& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(boost::math::is_floating_point::value) { return 709.0; } template -inline constexpr T log_min_value(const std::integral_constant& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED constexpr T log_min_value(const boost::math::integral_constant& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(boost::math::is_floating_point::value) { return -708.0; } @@ -100,19 +106,19 @@ inline constexpr T log_min_value(const std::integral_constant& BOOST_ // 80 and 128-bit long doubles: // template -inline constexpr T log_max_value(const std::integral_constant& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED inline constexpr T log_max_value(const boost::math::integral_constant& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(boost::math::is_floating_point::value) { return 11356.0L; } template -inline constexpr T log_min_value(const std::integral_constant& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED inline constexpr T log_min_value(const boost::math::integral_constant& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(boost::math::is_floating_point::value) { return -11355.0L; } template -inline T log_max_value(const std::integral_constant& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) +BOOST_MATH_GPU_ENABLED inline T log_max_value(const boost::math::integral_constant& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) { BOOST_MATH_STD_USING #ifdef __SUNPRO_CC @@ -125,7 +131,7 @@ inline T log_max_value(const std::integral_constant& BOOST_MATH_APPEND_E } template -inline T log_min_value(const std::integral_constant& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) +BOOST_MATH_GPU_ENABLED inline T log_min_value(const boost::math::integral_constant& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) { BOOST_MATH_STD_USING #ifdef __SUNPRO_CC @@ -138,14 +144,14 @@ inline T log_min_value(const std::integral_constant& BOOST_MATH_APPEND_E } template -inline constexpr T epsilon(const std::true_type& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED constexpr T epsilon(const boost::math::true_type& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(boost::math::is_floating_point::value) { - return std::numeric_limits::epsilon(); + return boost::math::numeric_limits::epsilon(); } #if defined(__GNUC__) && ((LDBL_MANT_DIG == 106) || (__LDBL_MANT_DIG__ == 106)) template <> -inline constexpr long double epsilon(const std::true_type& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(long double)) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED inline constexpr long double epsilon(const boost::math::true_type& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(long double)) noexcept(boost::math::is_floating_point::value) { // numeric_limits on Darwin (and elsewhere) tells lies here: // the issue is that long double on a few platforms is @@ -164,7 +170,7 @@ inline constexpr long double epsilon(const std::true_type& BOOST_MA #endif template -inline T epsilon(const std::false_type& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) +BOOST_MATH_GPU_ENABLED inline T epsilon(const boost::math::false_type& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) { // Note: don't cache result as precision may vary at runtime: BOOST_MATH_STD_USING // for ADL of std names @@ -174,23 +180,23 @@ inline T epsilon(const std::false_type& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE template struct log_limit_traits { - typedef typename std::conditional< - (std::numeric_limits::radix == 2) && - (std::numeric_limits::max_exponent == 128 - || std::numeric_limits::max_exponent == 1024 - || std::numeric_limits::max_exponent == 16384), - std::integral_constant::max_exponent > INT_MAX ? INT_MAX : static_cast(std::numeric_limits::max_exponent))>, - std::integral_constant + typedef typename boost::math::conditional< + (boost::math::numeric_limits::radix == 2) && + (boost::math::numeric_limits::max_exponent == 128 + || boost::math::numeric_limits::max_exponent == 1024 + || boost::math::numeric_limits::max_exponent == 16384), + boost::math::integral_constant::max_exponent > (boost::math::numeric_limits::max)() ? (boost::math::numeric_limits::max)() : static_cast(boost::math::numeric_limits::max_exponent))>, + boost::math::integral_constant >::type tag_type; static constexpr bool value = (tag_type::value != 0); - static_assert(::std::numeric_limits::is_specialized || !value, "Type T must be specialized or equal to 0"); + static_assert(::boost::math::numeric_limits::is_specialized || !value, "Type T must be specialized or equal to 0"); }; template struct log_limit_noexcept_traits_imp : public log_limit_traits {}; -template struct log_limit_noexcept_traits_imp : public std::integral_constant {}; +template struct log_limit_noexcept_traits_imp : public boost::math::integral_constant {}; template -struct log_limit_noexcept_traits : public log_limit_noexcept_traits_imp::value> {}; +struct log_limit_noexcept_traits : public log_limit_noexcept_traits_imp::value> {}; } // namespace detail @@ -200,28 +206,36 @@ struct log_limit_noexcept_traits : public log_limit_noexcept_traits_imp -inline constexpr T log_max_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(detail::log_limit_noexcept_traits::value) +BOOST_MATH_GPU_ENABLED inline constexpr T log_max_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(detail::log_limit_noexcept_traits::value) { -#ifndef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS - return detail::log_max_value(typename detail::log_limit_traits::tag_type()); +#ifndef BOOST_MATH_HAS_NVRTC + #ifndef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS + return detail::log_max_value(typename detail::log_limit_traits::tag_type()); + #else + BOOST_MATH_ASSERT(::boost::math::numeric_limits::is_specialized); + BOOST_MATH_STD_USING + static const T val = log((boost::math::numeric_limits::max)()); + return val; + #endif #else - BOOST_MATH_ASSERT(::std::numeric_limits::is_specialized); - BOOST_MATH_STD_USING - static const T val = log((std::numeric_limits::max)()); - return val; + return log((boost::math::numeric_limits::max)()); #endif } template -inline constexpr T log_min_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(detail::log_limit_noexcept_traits::value) +BOOST_MATH_GPU_ENABLED inline constexpr T log_min_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(detail::log_limit_noexcept_traits::value) { -#ifndef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS - return detail::log_min_value(typename detail::log_limit_traits::tag_type()); +#ifndef BOOST_MATH_HAS_NVRTC + #ifndef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS + return detail::log_min_value(typename detail::log_limit_traits::tag_type()); + #else + BOOST_MATH_ASSERT(::boost::math::numeric_limits::is_specialized); + BOOST_MATH_STD_USING + static const T val = log((boost::math::numeric_limits::min)()); + return val; + #endif #else - BOOST_MATH_ASSERT(::std::numeric_limits::is_specialized); - BOOST_MATH_STD_USING - static const T val = log((std::numeric_limits::min)()); - return val; + return log((boost::math::numeric_limits::min)()); #endif } @@ -230,84 +244,89 @@ inline constexpr T log_min_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept( #endif template -inline constexpr T epsilon(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T)) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED constexpr T epsilon(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T)) noexcept(boost::math::is_floating_point::value) { -#ifndef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS - return detail::epsilon(std::integral_constant::is_specialized>()); + // NVRTC does not like this dispatching method so we just skip to where we want to go +#ifndef BOOST_MATH_HAS_NVRTC + #ifndef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS + return detail::epsilon(boost::math::integral_constant::is_specialized>()); + #else + return ::boost::math::numeric_limits::is_specialized ? + detail::epsilon(boost::math::true_type()) : + detail::epsilon(boost::math::false_type()); + #endif #else - return ::std::numeric_limits::is_specialized ? - detail::epsilon(std::true_type()) : - detail::epsilon(std::false_type()); + return boost::math::numeric_limits::epsilon(); #endif } namespace detail{ template -inline constexpr T root_epsilon_imp(const std::integral_constant&) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED inline constexpr T root_epsilon_imp(const boost::math::integral_constant&) noexcept(boost::math::is_floating_point::value) { return static_cast(0.00034526698300124390839884978618400831996329879769945L); } template -inline constexpr T root_epsilon_imp(const T*, const std::integral_constant&) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED inline constexpr T root_epsilon_imp(const T*, const boost::math::integral_constant&) noexcept(boost::math::is_floating_point::value) { return static_cast(0.1490116119384765625e-7L); } template -inline constexpr T root_epsilon_imp(const T*, const std::integral_constant&) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED inline constexpr T root_epsilon_imp(const T*, const boost::math::integral_constant&) noexcept(boost::math::is_floating_point::value) { return static_cast(0.32927225399135962333569506281281311031656150598474e-9L); } template -inline constexpr T root_epsilon_imp(const T*, const std::integral_constant&) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED inline constexpr T root_epsilon_imp(const T*, const boost::math::integral_constant&) noexcept(boost::math::is_floating_point::value) { return static_cast(0.1387778780781445675529539585113525390625e-16L); } template -inline T root_epsilon_imp(const T*, const Tag&) +BOOST_MATH_GPU_ENABLED inline T root_epsilon_imp(const T*, const Tag&) { BOOST_MATH_STD_USING - static const T r_eps = sqrt(tools::epsilon()); + BOOST_MATH_STATIC_LOCAL_VARIABLE const T r_eps = sqrt(tools::epsilon()); return r_eps; } template -inline T root_epsilon_imp(const T*, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED inline T root_epsilon_imp(const T*, const boost::math::integral_constant&) { BOOST_MATH_STD_USING return sqrt(tools::epsilon()); } template -inline constexpr T cbrt_epsilon_imp(const std::integral_constant&) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED inline constexpr T cbrt_epsilon_imp(const boost::math::integral_constant&) noexcept(boost::math::is_floating_point::value) { return static_cast(0.0049215666011518482998719164346805794944150447839903L); } template -inline constexpr T cbrt_epsilon_imp(const T*, const std::integral_constant&) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED inline constexpr T cbrt_epsilon_imp(const T*, const boost::math::integral_constant&) noexcept(boost::math::is_floating_point::value) { return static_cast(6.05545445239333906078989272793696693569753008995e-6L); } template -inline constexpr T cbrt_epsilon_imp(const T*, const std::integral_constant&) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED inline constexpr T cbrt_epsilon_imp(const T*, const boost::math::integral_constant&) noexcept(boost::math::is_floating_point::value) { return static_cast(4.76837158203125e-7L); } template -inline constexpr T cbrt_epsilon_imp(const T*, const std::integral_constant&) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED inline constexpr T cbrt_epsilon_imp(const T*, const boost::math::integral_constant&) noexcept(boost::math::is_floating_point::value) { return static_cast(5.7749313854154005630396773604745549542403508090496e-12L); } template -inline T cbrt_epsilon_imp(const T*, const Tag&) +BOOST_MATH_GPU_ENABLED inline T cbrt_epsilon_imp(const T*, const Tag&) { BOOST_MATH_STD_USING; static const T cbrt_eps = pow(tools::epsilon(), T(1) / 3); @@ -315,38 +334,38 @@ inline T cbrt_epsilon_imp(const T*, const Tag&) } template -inline T cbrt_epsilon_imp(const T*, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED inline T cbrt_epsilon_imp(const T*, const boost::math::integral_constant&) { BOOST_MATH_STD_USING; return pow(tools::epsilon(), T(1) / 3); } template -inline constexpr T forth_root_epsilon_imp(const T*, const std::integral_constant&) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED inline constexpr T forth_root_epsilon_imp(const T*, const boost::math::integral_constant&) noexcept(boost::math::is_floating_point::value) { return static_cast(0.018581361171917516667460937040007436176452688944747L); } template -inline constexpr T forth_root_epsilon_imp(const T*, const std::integral_constant&) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED inline constexpr T forth_root_epsilon_imp(const T*, const boost::math::integral_constant&) noexcept(boost::math::is_floating_point::value) { return static_cast(0.0001220703125L); } template -inline constexpr T forth_root_epsilon_imp(const T*, const std::integral_constant&) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED inline constexpr T forth_root_epsilon_imp(const T*, const boost::math::integral_constant&) noexcept(boost::math::is_floating_point::value) { return static_cast(0.18145860519450699870567321328132261891067079047605e-4L); } template -inline constexpr T forth_root_epsilon_imp(const T*, const std::integral_constant&) noexcept(std::is_floating_point::value) +BOOST_MATH_GPU_ENABLED inline constexpr T forth_root_epsilon_imp(const T*, const boost::math::integral_constant&) noexcept(boost::math::is_floating_point::value) { return static_cast(0.37252902984619140625e-8L); } template -inline T forth_root_epsilon_imp(const T*, const Tag&) +BOOST_MATH_GPU_ENABLED inline T forth_root_epsilon_imp(const T*, const Tag&) { BOOST_MATH_STD_USING static const T r_eps = sqrt(sqrt(tools::epsilon())); @@ -354,7 +373,7 @@ inline T forth_root_epsilon_imp(const T*, const Tag&) } template -inline T forth_root_epsilon_imp(const T*, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED inline T forth_root_epsilon_imp(const T*, const boost::math::integral_constant&) { BOOST_MATH_STD_USING return sqrt(sqrt(tools::epsilon())); @@ -363,26 +382,26 @@ inline T forth_root_epsilon_imp(const T*, const std::integral_constant&) template struct root_epsilon_traits { - typedef std::integral_constant::radix == 2) && (::std::numeric_limits::digits != INT_MAX) ? std::numeric_limits::digits : 0> tag_type; + typedef boost::math::integral_constant::radix == 2) && (::boost::math::numeric_limits::digits != (boost::math::numeric_limits::max)()) ? boost::math::numeric_limits::digits : 0> tag_type; static constexpr bool has_noexcept = (tag_type::value == 113) || (tag_type::value == 64) || (tag_type::value == 53) || (tag_type::value == 24); }; } template -inline constexpr T root_epsilon() noexcept(std::is_floating_point::value && detail::root_epsilon_traits::has_noexcept) +BOOST_MATH_GPU_ENABLED inline constexpr T root_epsilon() noexcept(boost::math::is_floating_point::value && detail::root_epsilon_traits::has_noexcept) { return detail::root_epsilon_imp(static_cast(nullptr), typename detail::root_epsilon_traits::tag_type()); } template -inline constexpr T cbrt_epsilon() noexcept(std::is_floating_point::value && detail::root_epsilon_traits::has_noexcept) +BOOST_MATH_GPU_ENABLED inline constexpr T cbrt_epsilon() noexcept(boost::math::is_floating_point::value && detail::root_epsilon_traits::has_noexcept) { return detail::cbrt_epsilon_imp(static_cast(nullptr), typename detail::root_epsilon_traits::tag_type()); } template -inline constexpr T forth_root_epsilon() noexcept(std::is_floating_point::value && detail::root_epsilon_traits::has_noexcept) +BOOST_MATH_GPU_ENABLED inline constexpr T forth_root_epsilon() noexcept(boost::math::is_floating_point::value && detail::root_epsilon_traits::has_noexcept) { return detail::forth_root_epsilon_imp(static_cast(nullptr), typename detail::root_epsilon_traits::tag_type()); } diff --git a/include/boost/math/tools/promotion.hpp b/include/boost/math/tools/promotion.hpp index c117e9575d..a65f3703f4 100644 --- a/include/boost/math/tools/promotion.hpp +++ b/include/boost/math/tools/promotion.hpp @@ -3,6 +3,7 @@ // Copyright John Maddock 2006. // Copyright Paul A. Bristow 2006. // Copyright Matt Borland 2023. +// Copyright Ryan Elandt 2023. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. @@ -24,15 +25,7 @@ #endif #include -#include - -#if defined __has_include -# if __cplusplus > 202002L || (defined(_MSVC_LANG) && _MSVC_LANG > 202002L) -# if __has_include () -# include -# endif -# endif -#endif +#include namespace boost { @@ -40,272 +33,103 @@ namespace boost { namespace tools { + ///// This promotion system works as follows: + // + // Rule (one argument promotion rule): + // - Promotes `T` to `double` if `T` is an integer type as identified by + // `std::is_integral`, otherwise is `T` + // + // Rule (two or more argument promotion rule): + // - 1. Calculates type using applying Rule. + // - 2. Calculates type using applying Rule + // - If the type calculated in 1 and 2 are both floating point types, as + // identified by `std::is_floating_point`, then return the type + // determined by `std::common_type`. Otherwise return the type using + // an asymmetric convertibility rule. + // + ///// Discussion: + // // If either T1 or T2 is an integer type, // pretend it was a double (for the purposes of further analysis). // Then pick the wider of the two floating-point types // as the actual signature to forward to. // For example: - // foo(int, short) -> double foo(double, double); - // foo(int, float) -> double foo(double, double); - // Note: NOT float foo(float, float) - // foo(int, double) -> foo(double, double); - // foo(double, float) -> double foo(double, double); - // foo(double, float) -> double foo(double, double); - // foo(any-int-or-float-type, long double) -> foo(long double, long double); - // but ONLY float foo(float, float) is unchanged. - // So the only way to get an entirely float version is to call foo(1.F, 2.F), - // But since most (all?) the math functions convert to double internally, - // probably there would not be the hoped-for gain by using float here. - + // foo(int, short) -> double foo(double, double); // ***NOT*** float foo(float, float) + // foo(int, float) -> double foo(double, double); // ***NOT*** float foo(float, float) + // foo(int, double) -> foo(double, double); + // foo(double, float) -> double foo(double, double); + // foo(double, float) -> double foo(double, double); + // foo(any-int-or-float-type, long double) -> foo(long double, long double); + // ONLY float foo(float, float) is unchanged, so the only way to get an + // entirely float version is to call foo(1.F, 2.F). But since most (all?) the + // math functions convert to double internally, probably there would not be the + // hoped-for gain by using float here. + // // This follows the C-compatible conversion rules of pow, etc // where pow(int, float) is converted to pow(double, double). + + // Promotes a single argument to double if it is an integer type template - struct promote_arg - { // If T is integral type, then promote to double. - using type = typename std::conditional::value, double, T>::type; + struct promote_arg { + using type = typename boost::math::conditional::value, double, T>::type; }; - // These full specialisations reduce std::conditional usage and speed up - // compilation: - template <> struct promote_arg { using type = float; }; - template <> struct promote_arg{ using type = double; }; - template <> struct promote_arg { using type = long double; }; - template <> struct promote_arg { using type = double; }; - #ifdef __STDCPP_FLOAT16_T__ - template <> struct promote_arg { using type = std::float16_t; }; - #endif - #ifdef __STDCPP_FLOAT32_T__ - template <> struct promote_arg { using type = std::float32_t; }; - #endif - #ifdef __STDCPP_FLOAT64_T__ - template <> struct promote_arg { using type = std::float64_t; }; - #endif - #ifdef __STDCPP_FLOAT128_T__ - template <> struct promote_arg { using type = std::float128_t; }; - #endif - - template - using promote_arg_t = typename promote_arg::type; + // Promotes two arguments, neither of which is an integer type using an asymmetric + // convertibility rule. + template ::value && boost::math::is_floating_point::value)> + struct pa2_integral_already_removed { + using type = typename boost::math::conditional< + !boost::math::is_floating_point::value && boost::math::is_convertible::value, + T2, T1>::type; + }; + // For two floating point types, promotes using `std::common_type` functionality template - struct promote_args_2 - { // Promote, if necessary, & pick the wider of the two floating-point types. - // for both parameter types, if integral promote to double. - using T1P = typename promote_arg::type; // T1 perhaps promoted. - using T2P = typename promote_arg::type; // T2 perhaps promoted. - using intermediate_type = typename std::conditional< - std::is_floating_point::value && std::is_floating_point::value, // both T1P and T2P are floating-point? -#ifdef __STDCPP_FLOAT128_T__ - typename std::conditional::value || std::is_same::value, // either long double? - std::float128_t, -#endif -#ifdef BOOST_MATH_USE_FLOAT128 - typename std::conditional::value || std::is_same<__float128, T2P>::value, // either long double? - __float128, -#endif - typename std::conditional::value || std::is_same::value, // either long double? - long double, // then result type is long double. -#ifdef __STDCPP_FLOAT64_T__ - typename std::conditional::value || std::is_same::value, // either float64? - std::float64_t, // then result type is float64_t. -#endif - typename std::conditional::value || std::is_same::value, // either double? - double, // result type is double. -#ifdef __STDCPP_FLOAT32_T__ - typename std::conditional::value || std::is_same::value, // either float32? - std::float32_t, // then result type is float32_t. -#endif - float // else result type is float. - >::type -#ifdef BOOST_MATH_USE_FLOAT128 - >::type -#endif -#ifdef __STDCPP_FLOAT128_T__ - >::type -#endif -#ifdef __STDCPP_FLOAT64_T__ - >::type -#endif -#ifdef __STDCPP_FLOAT32_T__ - >::type -#endif - >::type, - // else one or the other is a user-defined type: - typename std::conditional::value && std::is_convertible::value, T2P, T1P>::type>::type; - -#ifdef __STDCPP_FLOAT64_T__ - // If long doubles are doubles then we should prefer to use std::float64_t when available - using type = std::conditional_t<(sizeof(double) == sizeof(long double) && std::is_same::value), std::float64_t, intermediate_type>; -#else - using type = intermediate_type; -#endif - }; // promote_arg2 - // These full specialisations reduce std::conditional usage and speed up - // compilation: - template <> struct promote_args_2 { using type = float; }; - template <> struct promote_args_2{ using type = double; }; - template <> struct promote_args_2 { using type = long double; }; - template <> struct promote_args_2 { using type = double; }; - template <> struct promote_args_2 { using type = double; }; - template <> struct promote_args_2 { using type = double; }; - template <> struct promote_args_2 { using type = double; }; - template <> struct promote_args_2 { using type = double; }; - template <> struct promote_args_2 { using type = long double; }; - template <> struct promote_args_2 { using type = long double; }; - template <> struct promote_args_2 { using type = double; }; - template <> struct promote_args_2 { using type = double; }; - template <> struct promote_args_2 { using type = long double; }; - template <> struct promote_args_2 { using type = long double; }; - template <> struct promote_args_2 { using type = long double; }; - template <> struct promote_args_2 { using type = long double; }; - - #ifdef __STDCPP_FLOAT128_T__ - template <> struct promote_args_2 { using type = std::float128_t; }; - template <> struct promote_args_2 { using type = std::float128_t; }; - template <> struct promote_args_2 { using type = std::float128_t; }; - template <> struct promote_args_2 { using type = std::float128_t; }; - template <> struct promote_args_2 { using type = std::float128_t; }; - template <> struct promote_args_2 { using type = std::float128_t; }; - template <> struct promote_args_2 { using type = std::float128_t; }; - template <> struct promote_args_2 { using type = std::float128_t; }; - - #ifdef __STDCPP_FLOAT16_T__ - template <> struct promote_args_2 { using type = std::float128_t; }; - template <> struct promote_args_2 { using type = std::float128_t; }; - #endif - - #ifdef __STDCPP_FLOAT32_T__ - template <> struct promote_args_2 { using type = std::float128_t; }; - template <> struct promote_args_2 { using type = std::float128_t; }; - #endif - - #ifdef __STDCPP_FLOAT64_T__ - template <> struct promote_args_2 { using type = std::float128_t; }; - template <> struct promote_args_2 { using type = std::float128_t; }; - #endif - - template <> struct promote_args_2 { using type = std::float128_t; }; - #endif - - #ifdef __STDCPP_FLOAT64_T__ - template <> struct promote_args_2 { using type = std::float64_t; }; - template <> struct promote_args_2 { using type = std::float64_t; }; - template <> struct promote_args_2 { using type = std::float64_t; }; - template <> struct promote_args_2 { using type = std::float64_t; }; - template <> struct promote_args_2 { using type = std::float64_t; }; - template <> struct promote_args_2 { using type = std::float64_t; }; - template <> struct promote_args_2 { using type = long double; }; - template <> struct promote_args_2 { using type = long double; }; - - #ifdef __STDCPP_FLOAT16_T__ - template <> struct promote_args_2 { using type = std::float64_t; }; - template <> struct promote_args_2 { using type = std::float64_t; }; - #endif - - #ifdef __STDCPP_FLOAT32_T__ - template <> struct promote_args_2 { using type = std::float64_t; }; - template <> struct promote_args_2 { using type = std::float64_t; }; - #endif - - template <> struct promote_args_2 { using type = std::float64_t; }; - #endif - - #ifdef __STDCPP_FLOAT32_T__ - template <> struct promote_args_2 { using type = std::float32_t; }; - template <> struct promote_args_2 { using type = std::float32_t; }; - template <> struct promote_args_2 { using type = std::float32_t; }; - template <> struct promote_args_2 { using type = std::float32_t; }; - template <> struct promote_args_2 { using type = double; }; - template <> struct promote_args_2 { using type = double; }; - template <> struct promote_args_2 { using type = long double; }; - template <> struct promote_args_2 { using type = long double; }; - - #ifdef __STDCPP_FLOAT16_T__ - template <> struct promote_args_2 { using type = std::float32_t; }; - template <> struct promote_args_2 { using type = std::float32_t; }; - #endif - - template <> struct promote_args_2 { using type = std::float32_t; }; - #endif + struct pa2_integral_already_removed { + using type = boost::math::common_type_t; + }; - #ifdef __STDCPP_FLOAT16_T__ - template <> struct promote_args_2 { using type = std::float16_t; }; - template <> struct promote_args_2 { using type = std::float16_t; }; - template <> struct promote_args_2 { using type = float; }; - template <> struct promote_args_2 { using type = float; }; - template <> struct promote_args_2 { using type = double; }; - template <> struct promote_args_2 { using type = double; }; - template <> struct promote_args_2 { using type = long double; }; - template <> struct promote_args_2 { using type = long double; }; - template <> struct promote_args_2 { using type = std::float16_t; }; - #endif + // Template definition for promote_args_permissive + template + struct promote_args_permissive; + // Specialization for one argument + template + struct promote_args_permissive { + using type = typename promote_arg::type>::type; + }; + // Specialization for two or more arguments + template + struct promote_args_permissive { + using type = typename pa2_integral_already_removed< + typename promote_args_permissive::type, + typename promote_args_permissive::type + >::type; + }; - template - using promote_args_2_t = typename promote_args_2::type; + template + using promote_args_permissive_t = typename promote_args_permissive::type; - template - struct promote_args - { - using type = typename promote_args_2< - typename std::remove_cv::type, - typename promote_args_2< - typename std::remove_cv::type, - typename promote_args_2< - typename std::remove_cv::type, - typename promote_args_2< - typename std::remove_cv::type, - typename promote_args_2< - typename std::remove_cv::type, typename std::remove_cv::type - >::type - >::type - >::type - >::type - >::type; + // Same as `promote_args_permissive` but with a static assertion that the promoted type + // is not `long double` if `BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS` is defined + template + struct promote_args { + using type = typename promote_args_permissive::type; #if defined(BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS) // // Guard against use of long double if it's not supported: // - static_assert((0 == std::is_same::value), "Sorry, but this platform does not have sufficient long double support for the special functions to be reliably implemented."); + static_assert((0 == boost::math::is_same::value), "Sorry, but this platform does not have sufficient long double support for the special functions to be reliably implemented."); #endif }; - template - using promote_args_t = typename promote_args::type; - - // - // This struct is the same as above, but has no static assert on long double usage, - // it should be used only on functions that can be implemented for long double - // even when std lib support is missing or broken for that type. - // - template - struct promote_args_permissive - { - using type = typename promote_args_2< - typename std::remove_cv::type, - typename promote_args_2< - typename std::remove_cv::type, - typename promote_args_2< - typename std::remove_cv::type, - typename promote_args_2< - typename std::remove_cv::type, - typename promote_args_2< - typename std::remove_cv::type, typename std::remove_cv::type - >::type - >::type - >::type - >::type - >::type; - }; - - template - using promote_args_permissive_t = typename promote_args_permissive::type; + template + using promote_args_t = typename promote_args::type; } // namespace tools } // namespace math } // namespace boost #endif // BOOST_MATH_PROMOTION_HPP - diff --git a/include/boost/math/tools/rational.hpp b/include/boost/math/tools/rational.hpp index 69b7251539..a535abcdc5 100644 --- a/include/boost/math/tools/rational.hpp +++ b/include/boost/math/tools/rational.hpp @@ -10,9 +10,14 @@ #pragma once #endif -#include #include #include +#include +#include + +#ifndef BOOST_MATH_HAS_NVRTC +#include +#endif #if BOOST_MATH_POLY_METHOD == 1 # define BOOST_HEADER() @@ -168,12 +173,12 @@ namespace boost{ namespace math{ namespace tools{ // Forward declaration to keep two phase lookup happy: // template -U evaluate_polynomial(const T* poly, U const& z, std::size_t count) BOOST_MATH_NOEXCEPT(U); +BOOST_MATH_GPU_ENABLED U evaluate_polynomial(const T* poly, U const& z, boost::math::size_t count) BOOST_MATH_NOEXCEPT(U); namespace detail{ template -inline V evaluate_polynomial_c_imp(const T* a, const V& val, const Tag*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& val, const Tag*) BOOST_MATH_NOEXCEPT(V) { return evaluate_polynomial(a, val, Tag::value); } @@ -186,7 +191,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& val, const Tag*) BOOST_M // the loop expanded versions above: // template -inline U evaluate_polynomial(const T* poly, U const& z, std::size_t count) BOOST_MATH_NOEXCEPT(U) +BOOST_MATH_GPU_ENABLED inline U evaluate_polynomial(const T* poly, U const& z, boost::math::size_t count) BOOST_MATH_NOEXCEPT(U) { BOOST_MATH_ASSERT(count > 0); U sum = static_cast(poly[count - 1]); @@ -201,69 +206,75 @@ inline U evaluate_polynomial(const T* poly, U const& z, std::size_t count) BOOST // Compile time sized polynomials, just inline forwarders to the // implementations above: // -template -inline V evaluate_polynomial(const T(&a)[N], const V& val) BOOST_MATH_NOEXCEPT(V) +template +BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial(const T(&a)[N], const V& val) BOOST_MATH_NOEXCEPT(V) { - typedef std::integral_constant tag_type; + typedef boost::math::integral_constant tag_type; return detail::evaluate_polynomial_c_imp(static_cast(a), val, static_cast(nullptr)); } -template -inline V evaluate_polynomial(const std::array& a, const V& val) BOOST_MATH_NOEXCEPT(V) +#ifndef BOOST_MATH_HAS_NVRTC +template +BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial(const std::array& a, const V& val) BOOST_MATH_NOEXCEPT(V) { - typedef std::integral_constant tag_type; + typedef boost::math::integral_constant tag_type; return detail::evaluate_polynomial_c_imp(static_cast(a.data()), val, static_cast(nullptr)); } +#endif // // Even polynomials are trivial: just square the argument! // template -inline U evaluate_even_polynomial(const T* poly, U z, std::size_t count) BOOST_MATH_NOEXCEPT(U) +BOOST_MATH_GPU_ENABLED inline U evaluate_even_polynomial(const T* poly, U z, boost::math::size_t count) BOOST_MATH_NOEXCEPT(U) { return evaluate_polynomial(poly, U(z*z), count); } -template -inline V evaluate_even_polynomial(const T(&a)[N], const V& z) BOOST_MATH_NOEXCEPT(V) +template +BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_even_polynomial(const T(&a)[N], const V& z) BOOST_MATH_NOEXCEPT(V) { return evaluate_polynomial(a, V(z*z)); } -template -inline V evaluate_even_polynomial(const std::array& a, const V& z) BOOST_MATH_NOEXCEPT(V) +#ifndef BOOST_MATH_HAS_NVRTC +template +BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_even_polynomial(const std::array& a, const V& z) BOOST_MATH_NOEXCEPT(V) { return evaluate_polynomial(a, V(z*z)); } +#endif // // Odd polynomials come next: // template -inline U evaluate_odd_polynomial(const T* poly, U z, std::size_t count) BOOST_MATH_NOEXCEPT(U) +BOOST_MATH_GPU_ENABLED inline U evaluate_odd_polynomial(const T* poly, U z, boost::math::size_t count) BOOST_MATH_NOEXCEPT(U) { return poly[0] + z * evaluate_polynomial(poly+1, U(z*z), count-1); } -template -inline V evaluate_odd_polynomial(const T(&a)[N], const V& z) BOOST_MATH_NOEXCEPT(V) +template +BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_odd_polynomial(const T(&a)[N], const V& z) BOOST_MATH_NOEXCEPT(V) { - typedef std::integral_constant tag_type; + typedef boost::math::integral_constant tag_type; return a[0] + z * detail::evaluate_polynomial_c_imp(static_cast(a) + 1, V(z*z), static_cast(nullptr)); } -template -inline V evaluate_odd_polynomial(const std::array& a, const V& z) BOOST_MATH_NOEXCEPT(V) +#ifndef BOOST_MATH_HAS_NVRTC +template +BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_odd_polynomial(const std::array& a, const V& z) BOOST_MATH_NOEXCEPT(V) { - typedef std::integral_constant tag_type; + typedef boost::math::integral_constant tag_type; return a[0] + z * detail::evaluate_polynomial_c_imp(static_cast(a.data()) + 1, V(z*z), static_cast(nullptr)); } +#endif template -V evaluate_rational(const T* num, const U* denom, const V& z_, std::size_t count) BOOST_MATH_NOEXCEPT(V); +BOOST_MATH_GPU_ENABLED V evaluate_rational(const T* num, const U* denom, const V& z_, boost::math::size_t count) BOOST_MATH_NOEXCEPT(V); namespace detail{ template -inline V evaluate_rational_c_imp(const T* num, const U* denom, const V& z, const Tag*) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* num, const U* denom, const V& z, const Tag*) BOOST_MATH_NOEXCEPT(V) { return boost::math::tools::evaluate_rational(num, denom, z, Tag::value); } @@ -278,7 +289,7 @@ inline V evaluate_rational_c_imp(const T* num, const U* denom, const V& z, const // in our Lanczos code for example. // template -V evaluate_rational(const T* num, const U* denom, const V& z_, std::size_t count) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED V evaluate_rational(const T* num, const U* denom, const V& z_, boost::math::size_t count) BOOST_MATH_NOEXCEPT(V) { V z(z_); V s1, s2; @@ -310,17 +321,19 @@ V evaluate_rational(const T* num, const U* denom, const V& z_, std::size_t count return s1 / s2; } -template -inline V evaluate_rational(const T(&a)[N], const U(&b)[N], const V& z) BOOST_MATH_NOEXCEPT(V) +template +BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_rational(const T(&a)[N], const U(&b)[N], const V& z) BOOST_MATH_NOEXCEPT(V) { - return detail::evaluate_rational_c_imp(a, b, z, static_cast*>(nullptr)); + return detail::evaluate_rational_c_imp(a, b, z, static_cast*>(nullptr)); } -template -inline V evaluate_rational(const std::array& a, const std::array& b, const V& z) BOOST_MATH_NOEXCEPT(V) +#ifndef BOOST_MATH_HAS_NVRTC +template +BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_rational(const std::array& a, const std::array& b, const V& z) BOOST_MATH_NOEXCEPT(V) { - return detail::evaluate_rational_c_imp(a.data(), b.data(), z, static_cast*>(nullptr)); + return detail::evaluate_rational_c_imp(a.data(), b.data(), z, static_cast*>(nullptr)); } +#endif } // namespace tools } // namespace math diff --git a/include/boost/math/tools/roots.hpp b/include/boost/math/tools/roots.hpp index 97e67fae95..b0b0fc246c 100644 --- a/include/boost/math/tools/roots.hpp +++ b/include/boost/math/tools/roots.hpp @@ -1,4 +1,5 @@ // (C) Copyright John Maddock 2006. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -9,20 +10,21 @@ #ifdef _MSC_VER #pragma once #endif -#include // test for multiprecision types in complex Newton - -#include -#include -#include -#include #include -#include - +#include // test for multiprecision types in complex Newton +#include +#include +#include +#include #include +#include +#include + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT #include #include -#include +#endif namespace boost { namespace math { @@ -33,11 +35,11 @@ namespace detail { namespace dummy { template - typename T::value_type get(const T&) BOOST_MATH_NOEXCEPT(T); + BOOST_MATH_GPU_ENABLED typename T::value_type get(const T&) BOOST_MATH_NOEXCEPT(T); } template -void unpack_tuple(const Tuple& t, T& a, T& b) BOOST_MATH_NOEXCEPT(T) +BOOST_MATH_GPU_ENABLED void unpack_tuple(const Tuple& t, T& a, T& b) BOOST_MATH_NOEXCEPT(T) { using dummy::get; // Use ADL to find the right overload for get: @@ -45,7 +47,7 @@ void unpack_tuple(const Tuple& t, T& a, T& b) BOOST_MATH_NOEXCEPT(T) b = get<1>(t); } template -void unpack_tuple(const Tuple& t, T& a, T& b, T& c) BOOST_MATH_NOEXCEPT(T) +BOOST_MATH_GPU_ENABLED void unpack_tuple(const Tuple& t, T& a, T& b, T& c) BOOST_MATH_NOEXCEPT(T) { using dummy::get; // Use ADL to find the right overload for get: @@ -55,7 +57,7 @@ void unpack_tuple(const Tuple& t, T& a, T& b, T& c) BOOST_MATH_NOEXCEPT(T) } template -inline void unpack_0(const Tuple& t, T& val) BOOST_MATH_NOEXCEPT(T) +BOOST_MATH_GPU_ENABLED inline void unpack_0(const Tuple& t, T& val) BOOST_MATH_NOEXCEPT(T) { using dummy::get; // Rely on ADL to find the correct overload of get: @@ -63,26 +65,30 @@ inline void unpack_0(const Tuple& t, T& val) BOOST_MATH_NOEXCEPT(T) } template -inline void unpack_tuple(const std::pair& p, V& a, V& b) BOOST_MATH_NOEXCEPT(T) +BOOST_MATH_GPU_ENABLED inline void unpack_tuple(const boost::math::pair& p, V& a, V& b) BOOST_MATH_NOEXCEPT(T) { a = p.first; b = p.second; } template -inline void unpack_0(const std::pair& p, V& a) BOOST_MATH_NOEXCEPT(T) +BOOST_MATH_GPU_ENABLED inline void unpack_0(const boost::math::pair& p, V& a) BOOST_MATH_NOEXCEPT(T) { a = p.first; } template -void handle_zero_derivative(F f, +BOOST_MATH_GPU_ENABLED void handle_zero_derivative(F f, T& last_f0, const T& f0, T& delta, T& result, T& guess, const T& min, - const T& max) noexcept(BOOST_MATH_IS_FLOAT(T) && noexcept(std::declval()(std::declval()))) + const T& max) noexcept(BOOST_MATH_IS_FLOAT(T) + #ifndef BOOST_MATH_HAS_GPU_SUPPORT + && noexcept(std::declval()(std::declval())) + #endif + ) { if (last_f0 == 0) { @@ -128,25 +134,29 @@ void handle_zero_derivative(F f, } // namespace template -std::pair bisect(F f, T min, T max, Tol tol, std::uintmax_t& max_iter, const Policy& pol) noexcept(policies::is_noexcept_error_policy::value&& BOOST_MATH_IS_FLOAT(T) && noexcept(std::declval()(std::declval()))) +BOOST_MATH_GPU_ENABLED boost::math::pair bisect(F f, T min, T max, Tol tol, boost::math::uintmax_t& max_iter, const Policy& pol) noexcept(policies::is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T) +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +&& noexcept(std::declval()(std::declval())) +#endif +) { T fmin = f(min); T fmax = f(max); if (fmin == 0) { max_iter = 2; - return std::make_pair(min, min); + return boost::math::make_pair(min, min); } if (fmax == 0) { max_iter = 2; - return std::make_pair(max, max); + return boost::math::make_pair(max, max); } // // Error checking: // - static const char* function = "boost::math::tools::bisect<%1%>"; + constexpr auto function = "boost::math::tools::bisect<%1%>"; if (min >= max) { return boost::math::detail::pair_from_single(policies::raise_evaluation_error(function, @@ -196,29 +206,41 @@ std::pair bisect(F f, T min, T max, Tol tol, std::uintmax_t& max_iter, con std::cout << "Bisection required " << max_iter << " iterations.\n"; #endif - return std::make_pair(min, max); + return boost::math::make_pair(min, max); } template -inline std::pair bisect(F f, T min, T max, Tol tol, std::uintmax_t& max_iter) noexcept(policies::is_noexcept_error_policy >::value&& BOOST_MATH_IS_FLOAT(T) && noexcept(std::declval()(std::declval()))) +BOOST_MATH_GPU_ENABLED inline boost::math::pair bisect(F f, T min, T max, Tol tol, boost::math::uintmax_t& max_iter) noexcept(policies::is_noexcept_error_policy >::value && BOOST_MATH_IS_FLOAT(T) +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +&& noexcept(std::declval()(std::declval())) +#endif +) { return bisect(f, min, max, tol, max_iter, policies::policy<>()); } template -inline std::pair bisect(F f, T min, T max, Tol tol) noexcept(policies::is_noexcept_error_policy >::value&& BOOST_MATH_IS_FLOAT(T) && noexcept(std::declval()(std::declval()))) +BOOST_MATH_GPU_ENABLED inline boost::math::pair bisect(F f, T min, T max, Tol tol) noexcept(policies::is_noexcept_error_policy >::value && BOOST_MATH_IS_FLOAT(T) +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +&& noexcept(std::declval()(std::declval())) +#endif +) { - std::uintmax_t m = (std::numeric_limits::max)(); + boost::math::uintmax_t m = (boost::math::numeric_limits::max)(); return bisect(f, min, max, tol, m, policies::policy<>()); } template -T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& max_iter) noexcept(policies::is_noexcept_error_policy >::value&& BOOST_MATH_IS_FLOAT(T) && noexcept(std::declval()(std::declval()))) +BOOST_MATH_GPU_ENABLED T newton_raphson_iterate(F f, T guess, T min, T max, int digits, boost::math::uintmax_t& max_iter) noexcept(policies::is_noexcept_error_policy >::value && BOOST_MATH_IS_FLOAT(T) +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +&& noexcept(std::declval()(std::declval())) +#endif +) { BOOST_MATH_STD_USING - static const char* function = "boost::math::tools::newton_raphson_iterate<%1%>"; + constexpr auto function = "boost::math::tools::newton_raphson_iterate<%1%>"; if (min > max) { return policies::raise_evaluation_error(function, "Range arguments in wrong order in boost::math::tools::newton_raphson_iterate(first arg=%1%)", min, boost::math::policies::policy<>()); @@ -245,7 +267,7 @@ T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& T max_range_f = 0; T min_range_f = 0; - std::uintmax_t count(max_iter); + boost::math::uintmax_t count(max_iter); #ifdef BOOST_MATH_INSTRUMENT std::cout << "Newton_raphson_iterate, guess = " << guess << ", min = " << min << ", max = " << max @@ -332,12 +354,22 @@ T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& } template -inline T newton_raphson_iterate(F f, T guess, T min, T max, int digits) noexcept(policies::is_noexcept_error_policy >::value&& BOOST_MATH_IS_FLOAT(T) && noexcept(std::declval()(std::declval()))) +BOOST_MATH_GPU_ENABLED inline T newton_raphson_iterate(F f, T guess, T min, T max, int digits) noexcept(policies::is_noexcept_error_policy >::value && BOOST_MATH_IS_FLOAT(T) +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +&& noexcept(std::declval()(std::declval())) +#endif +) { - std::uintmax_t m = (std::numeric_limits::max)(); + boost::math::uintmax_t m = (boost::math::numeric_limits::max)(); return newton_raphson_iterate(f, guess, min, max, digits, m); } +// TODO(mborland): Disabled for now +// Recursion needs to be removed, but there is no demand at this time +#ifdef BOOST_MATH_HAS_NVRTC +}}} // Namespaces +#else + namespace detail { struct halley_step @@ -1025,4 +1057,6 @@ inline std::pair::type, typename tools: } // namespace math } // namespace boost +#endif // BOOST_MATH_HAS_NVRTC + #endif // BOOST_MATH_TOOLS_NEWTON_SOLVER_HPP diff --git a/include/boost/math/tools/series.hpp b/include/boost/math/tools/series.hpp index a4822fea43..4617ea3df7 100644 --- a/include/boost/math/tools/series.hpp +++ b/include/boost/math/tools/series.hpp @@ -10,10 +10,11 @@ #pragma once #endif -#include -#include -#include + #include +#include +#include +#include namespace boost{ namespace math{ namespace tools{ @@ -21,13 +22,17 @@ namespace boost{ namespace math{ namespace tools{ // Simple series summation come first: // template -inline typename Functor::result_type sum_series(Functor& func, const U& factor, std::uintmax_t& max_terms, const V& init_value) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) +BOOST_MATH_GPU_ENABLED inline typename Functor::result_type sum_series(Functor& func, const U& factor, boost::math::uintmax_t& max_terms, const V& init_value) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +&& noexcept(std::declval()()) +#endif +) { BOOST_MATH_STD_USING typedef typename Functor::result_type result_type; - std::uintmax_t counter = max_terms; + boost::math::uintmax_t counter = max_terms; result_type result = init_value; result_type next_term; @@ -44,14 +49,22 @@ inline typename Functor::result_type sum_series(Functor& func, const U& factor, } template -inline typename Functor::result_type sum_series(Functor& func, const U& factor, std::uintmax_t& max_terms) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) +BOOST_MATH_GPU_ENABLED inline typename Functor::result_type sum_series(Functor& func, const U& factor, boost::math::uintmax_t& max_terms) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +&& noexcept(std::declval()()) +#endif +) { typename Functor::result_type init_value = 0; return sum_series(func, factor, max_terms, init_value); } template -inline typename Functor::result_type sum_series(Functor& func, int bits, std::uintmax_t& max_terms, const U& init_value) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) +BOOST_MATH_GPU_ENABLED inline typename Functor::result_type sum_series(Functor& func, int bits, boost::math::uintmax_t& max_terms, const U& init_value) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +&& noexcept(std::declval()()) +#endif +) { BOOST_MATH_STD_USING typedef typename Functor::result_type result_type; @@ -60,17 +73,25 @@ inline typename Functor::result_type sum_series(Functor& func, int bits, std::ui } template -inline typename Functor::result_type sum_series(Functor& func, int bits) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) +BOOST_MATH_GPU_ENABLED inline typename Functor::result_type sum_series(Functor& func, int bits) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +&& noexcept(std::declval()()) +#endif +) { BOOST_MATH_STD_USING typedef typename Functor::result_type result_type; - std::uintmax_t iters = (std::numeric_limits::max)(); + boost::math::uintmax_t iters = (boost::math::numeric_limits::max)(); result_type init_val = 0; return sum_series(func, bits, iters, init_val); } template -inline typename Functor::result_type sum_series(Functor& func, int bits, std::uintmax_t& max_terms) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) +BOOST_MATH_GPU_ENABLED inline typename Functor::result_type sum_series(Functor& func, int bits, boost::math::uintmax_t& max_terms) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +&& noexcept(std::declval()()) +#endif +) { BOOST_MATH_STD_USING typedef typename Functor::result_type result_type; @@ -79,23 +100,31 @@ inline typename Functor::result_type sum_series(Functor& func, int bits, std::ui } template -inline typename Functor::result_type sum_series(Functor& func, int bits, const U& init_value) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) +BOOST_MATH_GPU_ENABLED inline typename Functor::result_type sum_series(Functor& func, int bits, const U& init_value) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +&& noexcept(std::declval()()) +#endif +) { BOOST_MATH_STD_USING - std::uintmax_t iters = (std::numeric_limits::max)(); + boost::math::uintmax_t iters = (boost::math::numeric_limits::max)(); return sum_series(func, bits, iters, init_value); } // // Checked summation: // template -inline typename Functor::result_type checked_sum_series(Functor& func, const U& factor, std::uintmax_t& max_terms, const V& init_value, V& norm) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) +BOOST_MATH_GPU_ENABLED inline typename Functor::result_type checked_sum_series(Functor& func, const U& factor, boost::math::uintmax_t& max_terms, const V& init_value, V& norm) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +&& noexcept(std::declval()()) +#endif +) { BOOST_MATH_STD_USING typedef typename Functor::result_type result_type; - std::uintmax_t counter = max_terms; + boost::math::uintmax_t counter = max_terms; result_type result = init_value; result_type next_term; @@ -125,7 +154,11 @@ inline typename Functor::result_type checked_sum_series(Functor& func, const U& // in any case the result is still much better than a naive summation. // template -inline typename Functor::result_type kahan_sum_series(Functor& func, int bits) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) +BOOST_MATH_GPU_ENABLED inline typename Functor::result_type kahan_sum_series(Functor& func, int bits) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +&& noexcept(std::declval()()) +#endif +) { BOOST_MATH_STD_USING @@ -148,13 +181,17 @@ inline typename Functor::result_type kahan_sum_series(Functor& func, int bits) n } template -inline typename Functor::result_type kahan_sum_series(Functor& func, int bits, std::uintmax_t& max_terms) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) +BOOST_MATH_GPU_ENABLED inline typename Functor::result_type kahan_sum_series(Functor& func, int bits, boost::math::uintmax_t& max_terms) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +&& noexcept(std::declval()()) +#endif +) { BOOST_MATH_STD_USING typedef typename Functor::result_type result_type; - std::uintmax_t counter = max_terms; + boost::math::uintmax_t counter = max_terms; result_type factor = ldexp(result_type(1), bits); result_type result = func(); diff --git a/include/boost/math/tools/toms748_solve.hpp b/include/boost/math/tools/toms748_solve.hpp index ea93713224..dee2346853 100644 --- a/include/boost/math/tools/toms748_solve.hpp +++ b/include/boost/math/tools/toms748_solve.hpp @@ -1,4 +1,5 @@ // (C) Copyright John Maddock 2006. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -10,13 +11,13 @@ #pragma once #endif +#include #include +#include +#include +#include #include -#include #include -#include -#include -#include #ifdef BOOST_MATH_LOG_ROOT_ITERATIONS # define BOOST_MATH_LOGGER_INCLUDE @@ -32,29 +33,36 @@ template class eps_tolerance { public: - eps_tolerance() : eps(4 * tools::epsilon()) + BOOST_MATH_GPU_ENABLED eps_tolerance() : eps(4 * tools::epsilon()) { } - eps_tolerance(unsigned bits) + BOOST_MATH_GPU_ENABLED eps_tolerance(unsigned bits) { BOOST_MATH_STD_USING - eps = (std::max)(T(ldexp(1.0F, 1-bits)), T(4 * tools::epsilon())); + eps = BOOST_MATH_GPU_SAFE_MAX(T(ldexp(1.0F, 1-bits)), T(4 * tools::epsilon())); } - bool operator()(const T& a, const T& b) + BOOST_MATH_GPU_ENABLED bool operator()(const T& a, const T& b) { BOOST_MATH_STD_USING - return fabs(a - b) <= (eps * (std::min)(fabs(a), fabs(b))); + return fabs(a - b) <= (eps * BOOST_MATH_GPU_SAFE_MIN(fabs(a), fabs(b))); } private: T eps; }; +// CUDA warns about __host__ __device__ marker on defaulted constructor +// but the warning is benign +#ifdef BOOST_MATH_ENABLE_CUDA +# pragma nv_diag_suppress 20012 +#endif + struct equal_floor { - equal_floor()= default; + BOOST_MATH_GPU_ENABLED equal_floor() = default; + template - bool operator()(const T& a, const T& b) + BOOST_MATH_GPU_ENABLED bool operator()(const T& a, const T& b) { BOOST_MATH_STD_USING return (floor(a) == floor(b)) || (fabs((b-a)/b) < boost::math::tools::epsilon() * 2); @@ -63,9 +71,10 @@ struct equal_floor struct equal_ceil { - equal_ceil()= default; + BOOST_MATH_GPU_ENABLED equal_ceil() = default; + template - bool operator()(const T& a, const T& b) + BOOST_MATH_GPU_ENABLED bool operator()(const T& a, const T& b) { BOOST_MATH_STD_USING return (ceil(a) == ceil(b)) || (fabs((b - a) / b) < boost::math::tools::epsilon() * 2); @@ -74,19 +83,24 @@ struct equal_ceil struct equal_nearest_integer { - equal_nearest_integer()= default; + BOOST_MATH_GPU_ENABLED equal_nearest_integer() = default; + template - bool operator()(const T& a, const T& b) + BOOST_MATH_GPU_ENABLED bool operator()(const T& a, const T& b) { BOOST_MATH_STD_USING return (floor(a + 0.5f) == floor(b + 0.5f)) || (fabs((b - a) / b) < boost::math::tools::epsilon() * 2); } }; +#ifdef BOOST_MATH_ENABLE_CUDA +# pragma nv_diag_default 20012 +#endif + namespace detail{ template -void bracket(F f, T& a, T& b, T c, T& fa, T& fb, T& d, T& fd) +BOOST_MATH_GPU_ENABLED void bracket(F f, T& a, T& b, T c, T& fa, T& fb, T& d, T& fd) { // // Given a point c inside the existing enclosing interval @@ -150,7 +164,7 @@ void bracket(F f, T& a, T& b, T c, T& fa, T& fb, T& d, T& fd) } template -inline T safe_div(T num, T denom, T r) +BOOST_MATH_GPU_ENABLED inline T safe_div(T num, T denom, T r) { // // return num / denom without overflow, @@ -167,7 +181,7 @@ inline T safe_div(T num, T denom, T r) } template -inline T secant_interpolate(const T& a, const T& b, const T& fa, const T& fb) +BOOST_MATH_GPU_ENABLED inline T secant_interpolate(const T& a, const T& b, const T& fa, const T& fb) { // // Performs standard secant interpolation of [a,b] given @@ -188,9 +202,9 @@ inline T secant_interpolate(const T& a, const T& b, const T& fa, const T& fb) } template -T quadratic_interpolate(const T& a, const T& b, T const& d, - const T& fa, const T& fb, T const& fd, - unsigned count) +BOOST_MATH_GPU_ENABLED T quadratic_interpolate(const T& a, const T& b, T const& d, + const T& fa, const T& fb, T const& fd, + unsigned count) { // // Performs quadratic interpolation to determine the next point, @@ -244,9 +258,9 @@ T quadratic_interpolate(const T& a, const T& b, T const& d, } template -T cubic_interpolate(const T& a, const T& b, const T& d, - const T& e, const T& fa, const T& fb, - const T& fd, const T& fe) +BOOST_MATH_GPU_ENABLED T cubic_interpolate(const T& a, const T& b, const T& d, + const T& e, const T& fa, const T& fb, + const T& fd, const T& fe) { // // Uses inverse cubic interpolation of f(x) at points @@ -293,7 +307,7 @@ T cubic_interpolate(const T& a, const T& b, const T& d, } // namespace detail template -std::pair toms748_solve(F f, const T& ax, const T& bx, const T& fax, const T& fbx, Tol tol, std::uintmax_t& max_iter, const Policy& pol) +BOOST_MATH_GPU_ENABLED boost::math::pair toms748_solve(F f, const T& ax, const T& bx, const T& fax, const T& fbx, Tol tol, boost::math::uintmax_t& max_iter, const Policy& pol) { // // Main entry point and logic for Toms Algorithm 748 @@ -301,15 +315,15 @@ std::pair toms748_solve(F f, const T& ax, const T& bx, const T& fax, const // BOOST_MATH_STD_USING // For ADL of std math functions - static const char* function = "boost::math::tools::toms748_solve<%1%>"; + constexpr auto function = "boost::math::tools::toms748_solve<%1%>"; // // Sanity check - are we allowed to iterate at all? // if (max_iter == 0) - return std::make_pair(ax, bx); + return boost::math::make_pair(ax, bx); - std::uintmax_t count = max_iter; + boost::math::uintmax_t count = max_iter; T a, b, fa, fb, c, u, fu, a0, b0, d, fd, e, fe; static const T mu = 0.5f; @@ -330,7 +344,7 @@ std::pair toms748_solve(F f, const T& ax, const T& bx, const T& fax, const b = a; else if(fb == 0) a = b; - return std::make_pair(a, b); + return boost::math::make_pair(a, b); } if(boost::math::sign(fa) * boost::math::sign(fb) > 0) @@ -472,37 +486,37 @@ std::pair toms748_solve(F f, const T& ax, const T& bx, const T& fax, const a = b; } BOOST_MATH_LOG_COUNT(max_iter) - return std::make_pair(a, b); + return boost::math::make_pair(a, b); } template -inline std::pair toms748_solve(F f, const T& ax, const T& bx, const T& fax, const T& fbx, Tol tol, std::uintmax_t& max_iter) +BOOST_MATH_GPU_ENABLED inline boost::math::pair toms748_solve(F f, const T& ax, const T& bx, const T& fax, const T& fbx, Tol tol, boost::math::uintmax_t& max_iter) { return toms748_solve(f, ax, bx, fax, fbx, tol, max_iter, policies::policy<>()); } template -inline std::pair toms748_solve(F f, const T& ax, const T& bx, Tol tol, std::uintmax_t& max_iter, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline boost::math::pair toms748_solve(F f, const T& ax, const T& bx, Tol tol, boost::math::uintmax_t& max_iter, const Policy& pol) { if (max_iter <= 2) - return std::make_pair(ax, bx); + return boost::math::make_pair(ax, bx); max_iter -= 2; - std::pair r = toms748_solve(f, ax, bx, f(ax), f(bx), tol, max_iter, pol); + boost::math::pair r = toms748_solve(f, ax, bx, f(ax), f(bx), tol, max_iter, pol); max_iter += 2; return r; } template -inline std::pair toms748_solve(F f, const T& ax, const T& bx, Tol tol, std::uintmax_t& max_iter) +BOOST_MATH_GPU_ENABLED inline boost::math::pair toms748_solve(F f, const T& ax, const T& bx, Tol tol, boost::math::uintmax_t& max_iter) { return toms748_solve(f, ax, bx, tol, max_iter, policies::policy<>()); } template -std::pair bracket_and_solve_root(F f, const T& guess, T factor, bool rising, Tol tol, std::uintmax_t& max_iter, const Policy& pol) +BOOST_MATH_GPU_ENABLED boost::math::pair bracket_and_solve_root(F f, const T& guess, T factor, bool rising, Tol tol, boost::math::uintmax_t& max_iter, const Policy& pol) { BOOST_MATH_STD_USING - static const char* function = "boost::math::tools::bracket_and_solve_root<%1%>"; + constexpr auto function = "boost::math::tools::bracket_and_solve_root<%1%>"; // // Set up initial brackets: // @@ -513,7 +527,7 @@ std::pair bracket_and_solve_root(F f, const T& guess, T factor, bool risin // // Set up invocation count: // - std::uintmax_t count = max_iter - 1; + boost::math::uintmax_t count = max_iter - 1; int step = 32; @@ -563,7 +577,7 @@ std::pair bracket_and_solve_root(F f, const T& guess, T factor, bool risin // Escape route just in case the answer is zero! max_iter -= count; max_iter += 1; - return a > 0 ? std::make_pair(T(0), T(a)) : std::make_pair(T(a), T(0)); + return a > 0 ? boost::math::make_pair(T(0), T(a)) : boost::math::make_pair(T(a), T(0)); } if(count == 0) return boost::math::detail::pair_from_single(policies::raise_evaluation_error(function, "Unable to bracket root, last nearest value was %1%", a, pol)); @@ -592,7 +606,7 @@ std::pair bracket_and_solve_root(F f, const T& guess, T factor, bool risin } max_iter -= count; max_iter += 1; - std::pair r = toms748_solve( + boost::math::pair r = toms748_solve( f, (a < 0 ? b : a), (a < 0 ? a : b), @@ -608,7 +622,7 @@ std::pair bracket_and_solve_root(F f, const T& guess, T factor, bool risin } template -inline std::pair bracket_and_solve_root(F f, const T& guess, const T& factor, bool rising, Tol tol, std::uintmax_t& max_iter) +BOOST_MATH_GPU_ENABLED inline boost::math::pair bracket_and_solve_root(F f, const T& guess, const T& factor, bool rising, Tol tol, boost::math::uintmax_t& max_iter) { return bracket_and_solve_root(f, guess, factor, rising, tol, max_iter, policies::policy<>()); } diff --git a/include/boost/math/tools/tuple.hpp b/include/boost/math/tools/tuple.hpp index b5e42fc59e..dcc763e37a 100644 --- a/include/boost/math/tools/tuple.hpp +++ b/include/boost/math/tools/tuple.hpp @@ -1,4 +1,5 @@ // (C) Copyright John Maddock 2010. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -6,12 +7,65 @@ #ifndef BOOST_MATH_TUPLE_HPP_INCLUDED #define BOOST_MATH_TUPLE_HPP_INCLUDED -#include +#include + +#ifdef BOOST_MATH_ENABLE_CUDA + +#include +#include +#include + +namespace boost { +namespace math { + +using cuda::std::pair; +using cuda::std::tuple; + +using cuda::std::make_pair; + +using cuda::std::tie; +using cuda::std::get; + +using cuda::std::tuple_size; +using cuda::std::tuple_element; + +namespace detail { + +template +BOOST_MATH_GPU_ENABLED T&& forward(boost::math::remove_reference_t& arg) noexcept +{ + return static_cast(arg); +} + +template +BOOST_MATH_GPU_ENABLED T&& forward(boost::math::remove_reference_t&& arg) noexcept +{ + static_assert(!boost::math::is_lvalue_reference::value, "Cannot forward an rvalue as an lvalue."); + return static_cast(arg); +} + +} // namespace detail + +template +BOOST_MATH_GPU_ENABLED auto make_tuple(T&& t, Ts&&... ts) +{ + return cuda::std::tuple, boost::math::decay_t...>( + boost::math::detail::forward(t), boost::math::detail::forward(ts)... + ); +} + +} // namespace math +} // namespace boost + +#else + #include -namespace boost{ namespace math{ +namespace boost { +namespace math { using ::std::tuple; +using ::std::pair; // [6.1.3.2] Tuple creation functions using ::std::ignore; @@ -23,5 +77,12 @@ using ::std::get; using ::std::tuple_size; using ::std::tuple_element; -}} +// Pair helpers +using ::std::make_pair; + +} // namespace math +} // namespace boost + +#endif // BOOST_MATH_ENABLE_CUDA + #endif diff --git a/include/boost/math/tools/type_traits.hpp b/include/boost/math/tools/type_traits.hpp new file mode 100644 index 0000000000..a13332797b --- /dev/null +++ b/include/boost/math/tools/type_traits.hpp @@ -0,0 +1,494 @@ +// Copyright (c) 2024 Matt Borland +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Regular use of is not compatible with CUDA +// Adds aliases to unify the support +// Also adds convience overloads like is_same_v so we don't have to wait for C++17 + +#ifndef BOOST_MATH_TOOLS_TYPE_TRAITS +#define BOOST_MATH_TOOLS_TYPE_TRAITS + +#include + +#ifdef BOOST_MATH_ENABLE_CUDA + +#include + +namespace boost { +namespace math { + +// Helper classes +using cuda::std::integral_constant; +using cuda::std::true_type; +using cuda::std::false_type; + +// Primary type categories +using cuda::std::is_void; +using cuda::std::is_null_pointer; +using cuda::std::is_integral; +using cuda::std::is_floating_point; +using cuda::std::is_array; +using cuda::std::is_enum; +using cuda::std::is_union; +using cuda::std::is_class; +using cuda::std::is_function; +using cuda::std::is_pointer; +using cuda::std::is_lvalue_reference; +using cuda::std::is_rvalue_reference; +using cuda::std::is_member_object_pointer; +using cuda::std::is_member_function_pointer; + +// Composite Type Categories +using cuda::std::is_fundamental; +using cuda::std::is_arithmetic; +using cuda::std::is_scalar; +using cuda::std::is_object; +using cuda::std::is_compound; +using cuda::std::is_reference; +using cuda::std::is_member_pointer; + +// Type properties +using cuda::std::is_const; +using cuda::std::is_volatile; +using cuda::std::is_trivial; +using cuda::std::is_trivially_copyable; +using cuda::std::is_standard_layout; +using cuda::std::is_empty; +using cuda::std::is_polymorphic; +using cuda::std::is_abstract; +using cuda::std::is_final; +using cuda::std::is_signed; +using cuda::std::is_unsigned; + +// Supported Operations +using cuda::std::is_constructible; +using cuda::std::is_trivially_constructible; +using cuda::std::is_nothrow_constructible; + +using cuda::std::is_default_constructible; +using cuda::std::is_trivially_default_constructible; +using cuda::std::is_nothrow_default_constructible; + +using cuda::std::is_copy_constructible; +using cuda::std::is_trivially_copy_constructible; +using cuda::std::is_nothrow_copy_constructible; + +using cuda::std::is_move_constructible; +using cuda::std::is_trivially_move_constructible; +using cuda::std::is_nothrow_move_constructible; + +using cuda::std::is_assignable; +using cuda::std::is_trivially_assignable; +using cuda::std::is_nothrow_assignable; + +using cuda::std::is_copy_assignable; +using cuda::std::is_trivially_copy_assignable; +using cuda::std::is_nothrow_copy_assignable; + +using cuda::std::is_move_assignable; +using cuda::std::is_trivially_move_assignable; +using cuda::std::is_nothrow_move_assignable; + +using cuda::std::is_destructible; +using cuda::std::is_trivially_destructible; +using cuda::std::is_nothrow_destructible; + +using cuda::std::has_virtual_destructor; + +// Property Queries +using cuda::std::alignment_of; +using cuda::std::rank; +using cuda::std::extent; + +// Type Relationships +using cuda::std::is_same; +using cuda::std::is_base_of; +using cuda::std::is_convertible; + +// Const-volatility specifiers +using cuda::std::remove_cv; +using cuda::std::remove_cv_t; +using cuda::std::remove_const; +using cuda::std::remove_const_t; +using cuda::std::remove_volatile; +using cuda::std::remove_volatile_t; +using cuda::std::add_cv; +using cuda::std::add_cv_t; +using cuda::std::add_const; +using cuda::std::add_const_t; +using cuda::std::add_volatile; +using cuda::std::add_volatile_t; + +// References +using cuda::std::remove_reference; +using cuda::std::remove_reference_t; +using cuda::std::add_lvalue_reference; +using cuda::std::add_lvalue_reference_t; +using cuda::std::add_rvalue_reference; +using cuda::std::add_rvalue_reference_t; + +// Pointers +using cuda::std::remove_pointer; +using cuda::std::remove_pointer_t; +using cuda::std::add_pointer; +using cuda::std::add_pointer_t; + +// Sign Modifiers +using cuda::std::make_signed; +using cuda::std::make_signed_t; +using cuda::std::make_unsigned; +using cuda::std::make_unsigned_t; + +// Arrays +using cuda::std::remove_extent; +using cuda::std::remove_extent_t; +using cuda::std::remove_all_extents; +using cuda::std::remove_all_extents_t; + +// Misc transformations +using cuda::std::decay; +using cuda::std::decay_t; +using cuda::std::enable_if; +using cuda::std::enable_if_t; +using cuda::std::conditional; +using cuda::std::conditional_t; +using cuda::std::common_type; +using cuda::std::common_type_t; +using cuda::std::underlying_type; +using cuda::std::underlying_type_t; + +#else // STD versions + +#include + +namespace boost { +namespace math { + +// Helper classes +using std::integral_constant; +using std::true_type; +using std::false_type; + +// Primary type categories +using std::is_void; +using std::is_null_pointer; +using std::is_integral; +using std::is_floating_point; +using std::is_array; +using std::is_enum; +using std::is_union; +using std::is_class; +using std::is_function; +using std::is_pointer; +using std::is_lvalue_reference; +using std::is_rvalue_reference; +using std::is_member_object_pointer; +using std::is_member_function_pointer; + +// Composite Type Categories +using std::is_fundamental; +using std::is_arithmetic; +using std::is_scalar; +using std::is_object; +using std::is_compound; +using std::is_reference; +using std::is_member_pointer; + +// Type properties +using std::is_const; +using std::is_volatile; +using std::is_trivial; +using std::is_trivially_copyable; +using std::is_standard_layout; +using std::is_empty; +using std::is_polymorphic; +using std::is_abstract; +using std::is_final; +using std::is_signed; +using std::is_unsigned; + +// Supported Operations +using std::is_constructible; +using std::is_trivially_constructible; +using std::is_nothrow_constructible; + +using std::is_default_constructible; +using std::is_trivially_default_constructible; +using std::is_nothrow_default_constructible; + +using std::is_copy_constructible; +using std::is_trivially_copy_constructible; +using std::is_nothrow_copy_constructible; + +using std::is_move_constructible; +using std::is_trivially_move_constructible; +using std::is_nothrow_move_constructible; + +using std::is_assignable; +using std::is_trivially_assignable; +using std::is_nothrow_assignable; + +using std::is_copy_assignable; +using std::is_trivially_copy_assignable; +using std::is_nothrow_copy_assignable; + +using std::is_move_assignable; +using std::is_trivially_move_assignable; +using std::is_nothrow_move_assignable; + +using std::is_destructible; +using std::is_trivially_destructible; +using std::is_nothrow_destructible; + +using std::has_virtual_destructor; + +// Property Queries +using std::alignment_of; +using std::rank; +using std::extent; + +// Type Relationships +using std::is_same; +using std::is_base_of; +using std::is_convertible; + +// Const-volatility specifiers +using std::remove_cv; +using std::remove_cv_t; +using std::remove_const; +using std::remove_const_t; +using std::remove_volatile; +using std::remove_volatile_t; +using std::add_cv; +using std::add_cv_t; +using std::add_const; +using std::add_const_t; +using std::add_volatile; +using std::add_volatile_t; + +// References +using std::remove_reference; +using std::remove_reference_t; +using std::add_lvalue_reference; +using std::add_lvalue_reference_t; +using std::add_rvalue_reference; +using std::add_rvalue_reference_t; + +// Pointers +using std::remove_pointer; +using std::remove_pointer_t; +using std::add_pointer; +using std::add_pointer_t; + +// Sign Modifiers +using std::make_signed; +using std::make_signed_t; +using std::make_unsigned; +using std::make_unsigned_t; + +// Arrays +using std::remove_extent; +using std::remove_extent_t; +using std::remove_all_extents; +using std::remove_all_extents_t; + +// Misc transformations +using std::decay; +using std::decay_t; +using std::enable_if; +using std::enable_if_t; +using std::conditional; +using std::conditional_t; +using std::common_type; +using std::common_type_t; +using std::underlying_type; +using std::underlying_type_t; + +#endif + +template +using bool_constant = boost::math::integral_constant; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_void_v = boost::math::is_void::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_null_pointer_v = boost::math::is_null_pointer::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_integral_v = boost::math::is_integral::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_floating_point_v = boost::math::is_floating_point::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_array_v = boost::math::is_array::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_enum_v = boost::math::is_enum::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_union_v = boost::math::is_union::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_class_v = boost::math::is_class::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_function_v = boost::math::is_function::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_pointer_v = boost::math::is_pointer::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_lvalue_reference_v = boost::math::is_lvalue_reference::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_rvalue_reference_v = boost::math::is_rvalue_reference::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_member_object_pointer_v = boost::math::is_member_object_pointer::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_member_function_pointer_v = boost::math::is_member_function_pointer::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_fundamental_v = boost::math::is_fundamental::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_arithmetic_v = boost::math::is_arithmetic::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_scalar_v = boost::math::is_scalar::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_object_v = boost::math::is_object::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_compound_v = boost::math::is_compound::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_reference_v = boost::math::is_reference::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_member_pointer_v = boost::math::is_member_pointer::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_const_v = boost::math::is_const::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_volatile_v = boost::math::is_volatile::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_trivial_v = boost::math::is_trivial::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_trivially_copyable_v = boost::math::is_trivially_copyable::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_standard_layout_v = boost::math::is_standard_layout::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_empty_v = boost::math::is_empty::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_polymorphic_v = boost::math::is_polymorphic::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_abstract_v = boost::math::is_abstract::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_final_v = boost::math::is_final::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_signed_v = boost::math::is_signed::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_unsigned_v = boost::math::is_unsigned::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_constructible_v = boost::math::is_constructible::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_trivially_constructible_v = boost::math::is_trivially_constructible::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_nothrow_constructible_v = boost::math::is_nothrow_constructible::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_default_constructible_v = boost::math::is_default_constructible::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_trivially_default_constructible_v = boost::math::is_trivially_default_constructible::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_nothrow_default_constructible_v = boost::math::is_nothrow_default_constructible::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_copy_constructible_v = boost::math::is_copy_constructible::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_trivially_copy_constructible_v = boost::math::is_trivially_copy_constructible::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_nothrow_copy_constructible_v = boost::math::is_nothrow_copy_constructible::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_move_constructible_v = boost::math::is_move_constructible::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_trivially_move_constructible_v = boost::math::is_trivially_move_constructible::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_nothrow_move_constructible_v = boost::math::is_nothrow_move_constructible::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_assignable_v = boost::math::is_assignable::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_trivially_assignable_v = boost::math::is_trivially_assignable::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_nothrow_assignable_v = boost::math::is_nothrow_assignable::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_copy_assignable_v = boost::math::is_copy_assignable::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_trivially_copy_assignable_v = boost::math::is_trivially_copy_assignable::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_nothrow_copy_assignable_v = boost::math::is_nothrow_copy_assignable::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_move_assignable_v = boost::math::is_move_assignable::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_trivially_move_assignable_v = boost::math::is_trivially_move_assignable::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_nothrow_move_assignable_v = boost::math::is_nothrow_move_assignable::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_destructible_v = boost::math::is_destructible::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_trivially_destructible_v = boost::math::is_trivially_destructible::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_nothrow_destructible_v = boost::math::is_nothrow_destructible::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool has_virtual_destructor_v = boost::math::has_virtual_destructor::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_same_v = boost::math::is_same::value; + +template +BOOST_MATH_INLINE_CONSTEXPR bool is_base_of_v = boost::math::is_base_of::value; + +} // namespace math +} // namespace boost + +#endif // BOOST_MATH_TOOLS_TYPE_TRAITS diff --git a/include/boost/math/tools/utility.hpp b/include/boost/math/tools/utility.hpp new file mode 100644 index 0000000000..3e22865780 --- /dev/null +++ b/include/boost/math/tools/utility.hpp @@ -0,0 +1,69 @@ +// Copyright (c) 2024 Matt Borland +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_TOOLS_UTILITY +#define BOOST_MATH_TOOLS_UTILITY + +#include + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT + +#include + +namespace boost { +namespace math { + +template +constexpr T min BOOST_MATH_PREVENT_MACRO_SUBSTITUTION (const T& a, const T& b) +{ + return (std::min)(a, b); +} + +template +constexpr T max BOOST_MATH_PREVENT_MACRO_SUBSTITUTION (const T& a, const T& b) +{ + return (std::max)(a, b); +} + +template +void swap BOOST_MATH_PREVENT_MACRO_SUBSTITUTION (T& a, T& b) +{ + return (std::swap)(a, b); +} + +} // namespace math +} // namespace boost + +#else + +namespace boost { +namespace math { + +template +BOOST_MATH_GPU_ENABLED constexpr T min BOOST_MATH_PREVENT_MACRO_SUBSTITUTION (const T& a, const T& b) +{ + return a < b ? a : b; +} + +template +BOOST_MATH_GPU_ENABLED constexpr T max BOOST_MATH_PREVENT_MACRO_SUBSTITUTION (const T& a, const T& b) +{ + return a > b ? a : b; +} + +template +BOOST_MATH_GPU_ENABLED constexpr void swap BOOST_MATH_PREVENT_MACRO_SUBSTITUTION (T& a, T& b) +{ + T t(a); + a = b; + b = t; +} + +} // namespace math +} // namespace boost + +#endif // BOOST_MATH_HAS_GPU_SUPPORT + +#endif // BOOST_MATH_TOOLS_UTILITY diff --git a/include/boost/math/tools/workaround.hpp b/include/boost/math/tools/workaround.hpp index 9b15c4e930..7edd1c12aa 100644 --- a/include/boost/math/tools/workaround.hpp +++ b/include/boost/math/tools/workaround.hpp @@ -23,7 +23,7 @@ namespace boost{ namespace math{ namespace tools{ // std::fmod(1185.0L, 1.5L); // template -inline T fmod_workaround(T a, T b) BOOST_MATH_NOEXCEPT(T) +BOOST_MATH_GPU_ENABLED inline T fmod_workaround(T a, T b) BOOST_MATH_NOEXCEPT(T) { BOOST_MATH_STD_USING return fmod(a, b); diff --git a/include_private/boost/math/tools/remez.hpp b/include_private/boost/math/tools/remez.hpp index 8f817d7ce3..3fdd473969 100644 --- a/include_private/boost/math/tools/remez.hpp +++ b/include_private/boost/math/tools/remez.hpp @@ -10,7 +10,7 @@ #pragma once #endif -#include +#include "solve.hpp" #include #include #include diff --git a/include_private/boost/math/tools/test.hpp b/include_private/boost/math/tools/test.hpp index 10f6143e2b..7547ef5be2 100644 --- a/include_private/boost/math/tools/test.hpp +++ b/include_private/boost/math/tools/test.hpp @@ -1,4 +1,5 @@ // (C) Copyright John Maddock 2006. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -253,6 +254,7 @@ test_result test_hetero(const A& a, F1 test_func, F2 expect_func) return result; } +#ifndef BOOST_MATH_NO_EXCEPTIONS template void test_check_throw(Val, Exception) { @@ -293,6 +295,7 @@ void test_check_throw(Val v, boost::math::rounding_error const*) BOOST_CHECK((v == boost::math::tools::max_value()) || (v == -boost::math::tools::max_value())); } } +#endif } // namespace tools } // namespace math @@ -303,7 +306,9 @@ void test_check_throw(Val v, boost::math::rounding_error const*) // exception-free testing support, ideally we'd only define this in our tests, // but to keep things simple we really need it somewhere that's always included: // -#ifdef BOOST_NO_EXCEPTIONS +#if defined(BOOST_MATH_NO_EXCEPTIONS) && defined(BOOST_MATH_HAS_GPU_SUPPORT) +# define BOOST_MATH_CHECK_THROW(x, y) +#elif defined(BOOST_MATH_NO_EXCEPTIONS) # define BOOST_MATH_CHECK_THROW(x, ExceptionType) boost::math::tools::test_check_throw(x, static_cast(nullptr)); #else # define BOOST_MATH_CHECK_THROW(x, y) BOOST_CHECK_THROW(x, y) diff --git a/reporting/accuracy/Jamfile.v2 b/reporting/accuracy/Jamfile.v2 index a07e60083b..0d961e66c5 100644 --- a/reporting/accuracy/Jamfile.v2 +++ b/reporting/accuracy/Jamfile.v2 @@ -12,7 +12,8 @@ import testing ; import modules ; import path ; import pch ; -import ../../../config/checks/config : requires ; +import-search /boost/config/checks ; +import config : requires ; using quickbook ; using auto-index ; @@ -47,16 +48,16 @@ explicit has_gsl ; exe has_rmath : has_rmath.cpp Rmath ; explicit has_rmath ; -CEPHES_SOURCE = acosh.c airy.c asin.c asinh.c atan.c atanh.c bdtr.c beta.c -btdtr.c cbrt.c chbevl.c chdtr.c clog.c cmplx.c const.c -cosh.c dawsn.c drand.c ei.c ellie.c ellik.c ellpe.c ellpj.c ellpk.c -exp.c exp10.c exp2.c expn.c expx2.c fabs.c fac.c fdtr.c -fresnl.c gamma.c gdtr.c hyp2f1.c hyperg.c i0.c i1.c igami.c incbet.c -incbi.c igam.c isnan.c iv.c j0.c j1.c jn.c jv.c k0.c k1.c kn.c kolmogorov.c -log.c log2.c log10.c lrand.c nbdtr.c ndtr.c ndtri.c pdtr.c planck.c -polevl.c polmisc.c polylog.c polyn.c pow.c powi.c psi.c rgamma.c round.c -shichi.c sici.c sin.c sindg.c sinh.c spence.c stdtr.c struve.c -tan.c tandg.c tanh.c unity.c yn.c zeta.c zetac.c +CEPHES_SOURCE = acosh.c airy.c asin.c asinh.c atan.c atanh.c bdtr.c beta.c +btdtr.c cbrt.c chbevl.c chdtr.c clog.c cmplx.c const.c +cosh.c dawsn.c drand.c ei.c ellie.c ellik.c ellpe.c ellpj.c ellpk.c +exp.c exp10.c exp2.c expn.c expx2.c fabs.c fac.c fdtr.c +fresnl.c gamma.c gdtr.c hyp2f1.c hyperg.c i0.c i1.c igami.c incbet.c +incbi.c igam.c isnan.c iv.c j0.c j1.c jn.c jv.c k0.c k1.c kn.c kolmogorov.c +log.c log2.c log10.c lrand.c nbdtr.c ndtr.c ndtri.c pdtr.c planck.c +polevl.c polmisc.c polylog.c polyn.c pow.c powi.c psi.c rgamma.c round.c +shichi.c sici.c sin.c sindg.c sinh.c spence.c stdtr.c struve.c +tan.c tandg.c tanh.c unity.c yn.c zeta.c zetac.c sqrt.c floor.c setprec.c mtherr.c ; path-constant here : . ; @@ -68,10 +69,10 @@ actions check_exists explicit $(here)/third_party/cephes_double/acosh.c ; lib cephes_double : $(here)/third_party/cephes_double/$(CEPHES_SOURCE) - : + : release static - [ check-target-builds $(here)/third_party/cephes_double/acosh.c : : no ] + [ check-target-builds $(here)/third_party/cephes_double/acosh.c : : no ] ; explicit cephes_double ; @@ -80,52 +81,52 @@ rule all-tests { local result ; for local source in [ glob test*.cpp ] { - result += [ run $(source) /boost/system /boost/filesystem /boost/test//boost_unit_test_framework - : : : - [ check-target-builds has_gsl : ALWAYS_TEST_DOUBLE : ] + result += [ run $(source) /boost/system//boost_system /boost/filesystem//boost_filesystem /boost/test//boost_unit_test_framework /boost/interprocess//boost_interprocess /boost/multiprecision//boost_multiprecision /boost/type_index//boost_type_index quadmath mpfr + : : : + [ check-target-builds has_gsl : ALWAYS_TEST_DOUBLE : ] linux:-lpthread linux:-lrt gcc:$(OTHERFLAGS) ] ; - result += [ run $(source) /boost/system /boost/filesystem /boost/test//boost_unit_test_framework - : : : [ check-target-builds has_cxx17_cmath : TEST_CXX17_CMATH : no ] + result += [ run $(source) /boost/system//boost_system /boost/filesystem//boost_filesystem /boost/test//boost_unit_test_framework /boost/interprocess//boost_interprocess /boost/multiprecision//boost_multiprecision /boost/type_index//boost_type_index quadmath mpfr + : : : [ check-target-builds has_cxx17_cmath : TEST_CXX17_CMATH : no ] linux:-lpthread linux:-lrt gcc:$(OTHERFLAGS) - : $(source:B)_cxx17_cmath ] + : $(source:B)_cxx17_cmath ] ; - result += [ run $(source) /boost/system /boost/filesystem /boost/test//boost_unit_test_framework - : : : [ check-target-builds has_c99_cmath : TEST_C99 : no ] + result += [ run $(source) /boost/system//boost_system /boost/filesystem//boost_filesystem /boost/test//boost_unit_test_framework /boost/interprocess//boost_interprocess /boost/multiprecision//boost_multiprecision /boost/type_index//boost_type_index quadmath mpfr + : : : [ check-target-builds has_c99_cmath : TEST_C99 : no ] linux:-lpthread linux:-lrt gcc:$(OTHERFLAGS) - : $(source:B)_c99 ] + : $(source:B)_c99 ] ; - result += [ run $(source) /boost/system /boost/filesystem /boost/test//boost_unit_test_framework gsl gslcblas - : : : [ check-target-builds has_gsl : TEST_GSL : no ] + result += [ run $(source) /boost/system//boost_system /boost/filesystem//boost_filesystem /boost/test//boost_unit_test_framework /boost/interprocess//boost_interprocess /boost/multiprecision//boost_multiprecision /boost/type_index//boost_type_index gsl gslcblas + : : : [ check-target-builds has_gsl : TEST_GSL : no ] linux:-lpthread linux:-lrt gcc:$(OTHERFLAGS) - : $(source:B)_gsl ] + : $(source:B)_gsl ] ; - result += [ run $(source) /boost/system /boost/filesystem /boost/test//boost_unit_test_framework Rmath - : : : [ check-target-builds has_rmath : TEST_RMATH : no ] + result += [ run $(source) /boost/system//boost_system /boost/filesystem//boost_filesystem /boost/test//boost_unit_test_framework /boost/interprocess//boost_interprocess /boost/multiprecision//boost_multiprecision /boost/type_index//boost_type_index Rmath + : : : [ check-target-builds has_rmath : TEST_RMATH : no ] linux:-lpthread linux:-lrt gcc:$(OTHERFLAGS) - : $(source:B)_rmath ] + : $(source:B)_rmath ] ; - result += [ run $(source) /boost/system /boost/filesystem /boost/test//boost_unit_test_framework cephes_double - : : : [ check-target-builds $(here)/third_party/cephes_double/acosh.c : TEST_CEPHES cephes_double : no ] + result += [ run $(source) /boost/system//boost_system /boost/filesystem//boost_filesystem /boost/test//boost_unit_test_framework /boost/interprocess//boost_interprocess /boost/multiprecision//boost_multiprecision /boost/type_index//boost_type_index cephes_double + : : : [ check-target-builds $(here)/third_party/cephes_double/acosh.c : TEST_CEPHES cephes_double : no ] linux:-lpthread linux:-lrt gcc:$(OTHERFLAGS) - : $(source:B)_cephes ] + : $(source:B)_cephes ] ; } - return $(result) ; + return $(result) ; } - + test-suite report_gen : [ all-tests ] ; path-constant images_location : html ; @@ -138,7 +139,7 @@ boostbook standalone : # Path for links to Boost: boost.root=../../../../.. - + # Some general style settings: table.footnote.number.format=1 footnote.number.format=1 @@ -159,25 +160,25 @@ boostbook standalone generate.section.toc.level=10 ; -lib gmp ; -lib mpfr ; -lib quadmath ; +searched-lib gmp : : shared ; +searched-lib mpfr : : shared ; +searched-lib quadmath : : shared ; # # Some manual tests that are expensive to run: # -run erf_error_plot.cpp mpfr gmp : : : release 17 [ check-target-builds ../../config//has_mpfr : : no ] : erf_error_plot_double ; +run erf_error_plot.cpp /boost/multiprecision//boost_multiprecision mpfr gmp : : : release 17 [ check-target-builds ../../config//has_mpfr : : no ] : erf_error_plot_double ; explicit erf_error_plot_double ; -run erf_error_plot.cpp mpfr gmp : : : release 17 TEST_TYPE="\"long double\"" [ check-target-builds ../../config//has_mpfr : : no ] : erf_error_plot_long_double ; +run erf_error_plot.cpp /boost/multiprecision//boost_multiprecision mpfr gmp : : : release 17 TEST_TYPE="\"long double\"" [ check-target-builds ../../config//has_mpfr : : no ] : erf_error_plot_long_double ; explicit erf_error_plot_long_double ; -run erf_error_plot.cpp mpfr gmp : : : release 17 TEST_TYPE=cpp_bin_float_50 [ check-target-builds ../../config//has_mpfr : : no ] : erf_error_plot_cpp_bin_float_50 ; +run erf_error_plot.cpp /boost/multiprecision//boost_multiprecision mpfr gmp : : : release 17 TEST_TYPE=cpp_bin_float_50 [ check-target-builds ../../config//has_mpfr : : no ] : erf_error_plot_cpp_bin_float_50 ; explicit erf_error_plot_cpp_bin_float_50 ; -run erf_error_plot.cpp mpfr gmp quadmath : : : release 17 gnu TEST_TYPE=float128 [ check-target-builds ../../config//has_mpfr : : no ] : erf_error_plot_float128 ; +run erf_error_plot.cpp /boost/multiprecision//boost_multiprecision mpfr gmp quadmath : : : release 17 gnu TEST_TYPE=float128 [ check-target-builds ../../config//has_mpfr : : no ] : erf_error_plot_float128 ; explicit erf_error_plot_cpp_bin_float_50 ; -run erfc_error_plot.cpp mpfr gmp : : : release 17 [ check-target-builds ../../config//has_mpfr : : no ] : erfc_error_plot_double ; +run erfc_error_plot.cpp /boost/multiprecision//boost_multiprecision mpfr gmp : : : release 17 [ check-target-builds ../../config//has_mpfr : : no ] : erfc_error_plot_double ; explicit erfc_error_plot_double ; -run erfc_error_plot.cpp mpfr gmp : : : release 17 TEST_TYPE="\"long double\"" [ check-target-builds ../../config//has_mpfr : : no ] : erfc_error_plot_long_double ; +run erfc_error_plot.cpp /boost/multiprecision//boost_multiprecision mpfr gmp : : : release 17 TEST_TYPE="\"long double\"" [ check-target-builds ../../config//has_mpfr : : no ] : erfc_error_plot_long_double ; explicit erfc_error_plot_long_double ; -run erfc_error_plot.cpp mpfr gmp : : : release 17 TEST_TYPE=cpp_bin_float_50 [ check-target-builds ../../config//has_mpfr : : no ] : erfc_error_plot_cpp_bin_float_50 ; +run erfc_error_plot.cpp /boost/multiprecision//boost_multiprecision mpfr gmp : : : release 17 TEST_TYPE=cpp_bin_float_50 [ check-target-builds ../../config//has_mpfr : : no ] : erfc_error_plot_cpp_bin_float_50 ; explicit erfc_error_plot_cpp_bin_float_50 ; -run erfc_error_plot.cpp mpfr gmp quadmath : : : release 17 gnu TEST_TYPE=float128 [ check-target-builds ../../config//has_mpfr : : no ] : erfc_error_plot_float128 ; +run erfc_error_plot.cpp /boost/multiprecision//boost_multiprecision mpfr gmp quadmath : : : release 17 gnu TEST_TYPE=float128 [ check-target-builds ../../config//has_mpfr : : no ] : erfc_error_plot_float128 ; explicit erfc_error_plot_cpp_bin_float_50 ; diff --git a/reporting/performance/Jamfile.v2 b/reporting/performance/Jamfile.v2 index 1f1d6fd639..0a960e9236 100644 --- a/reporting/performance/Jamfile.v2 +++ b/reporting/performance/Jamfile.v2 @@ -12,11 +12,13 @@ import testing ; import modules ; import path ; import pch ; -import ../../../config/checks/config : requires ; +import-search /boost/config/checks ; +import config : requires ; using quickbook ; using auto-index ; -import ../../../predef/tools/check/predef +import-search /boost/predef/tools/check ; +import predef : check require : predef-check predef-require ; @@ -39,9 +41,9 @@ if $(is_unix) # # Configuration first: # -lib gsl ; -lib gslcblas ; -lib Rmath ; +searched-lib gsl ; +searched-lib gslcblas ; +searched-lib Rmath ; obj has_libstdcxx_tr1 : ../accuracy/has_libstdcxx_tr1.cpp ; explicit has_libstdcxx_tr1 ; obj has_c99_cmath : has_c99_cmath.cpp ; @@ -53,16 +55,16 @@ explicit has_rmath ; obj is_intel_win : is_intel_win.cpp ; explicit is_intel_win ; -CEPHES_SOURCE = acosh.c airy.c asin.c asinh.c atan.c atanh.c bdtr.c beta.c -btdtr.c cbrt.c chbevl.c chdtr.c clog.c cmplx.c const.c -cosh.c dawsn.c drand.c ei.c ellie.c ellik.c ellpe.c ellpj.c ellpk.c -exp.c exp10.c exp2.c expn.c expx2.c fabs.c fac.c fdtr.c -fresnl.c gamma.c gdtr.c hyp2f1.c hyperg.c i0.c i1.c igami.c incbet.c -incbi.c igam.c isnan.c iv.c j0.c j1.c jn.c jv.c k0.c k1.c kn.c kolmogorov.c -log.c log2.c log10.c lrand.c nbdtr.c ndtr.c ndtri.c pdtr.c planck.c -polevl.c polmisc.c polylog.c polyn.c pow.c powi.c psi.c rgamma.c round.c -shichi.c sici.c sin.c sindg.c sinh.c spence.c stdtr.c struve.c -tan.c tandg.c tanh.c unity.c yn.c zeta.c zetac.c +CEPHES_SOURCE = acosh.c airy.c asin.c asinh.c atan.c atanh.c bdtr.c beta.c +btdtr.c cbrt.c chbevl.c chdtr.c clog.c cmplx.c const.c +cosh.c dawsn.c drand.c ei.c ellie.c ellik.c ellpe.c ellpj.c ellpk.c +exp.c exp10.c exp2.c expn.c expx2.c fabs.c fac.c fdtr.c +fresnl.c gamma.c gdtr.c hyp2f1.c hyperg.c i0.c i1.c igami.c incbet.c +incbi.c igam.c isnan.c iv.c j0.c j1.c jn.c jv.c k0.c k1.c kn.c kolmogorov.c +log.c log2.c log10.c lrand.c nbdtr.c ndtr.c ndtri.c pdtr.c planck.c +polevl.c polmisc.c polylog.c polyn.c pow.c powi.c psi.c rgamma.c round.c +shichi.c sici.c sin.c sindg.c sinh.c spence.c stdtr.c struve.c +tan.c tandg.c tanh.c unity.c yn.c zeta.c zetac.c sqrt.c floor.c setprec.c mtherr.c ; DCDFLIB_SOURCE = dcdflib.c ipmpar.c ; @@ -78,33 +80,33 @@ explicit $(here)/third_party/cephes_double/acosh.c ; explicit $(here)/third_party/dcdflib/dcdflib.c ; lib cephes_double : $(here)/third_party/cephes_double/$(CEPHES_SOURCE) - : + : release static - [ check-target-builds $(here)/third_party/cephes_double/acosh.c : : no ] + [ check-target-builds $(here)/third_party/cephes_double/acosh.c : : no ] ; explicit cephes_double ; lib dcdflib : $(here)/third_party/dcdflib/$(DCDFLIB_SOURCE) - : + : release static - [ check-target-builds $(here)/third_party/dcdflib/dcdflib.c : : no ] + [ check-target-builds $(here)/third_party/dcdflib/dcdflib.c : : no ] ; explicit dcdflib ; -obj table_helper : table_helper.cpp ; +obj table_helper : table_helper.cpp /boost/filesystem//boost_filesystem /boost/interprocess//boost_interprocess ; rule all-tests { local result ; for local source in [ glob test*.cpp ] { - result += [ run $(source) /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper - : : : release ../../test - [ check-target-builds ../accuracy//has_c99_cmath : TEST_C99 ] - [ check-target-builds has_libstdcxx_tr1 : TEST_LIBSTDCXX ] + result += [ run $(source) /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem /boost/multiprecision//boost_multiprecision /boost/filesystem//boost_filesystem table_helper + : : : release ../../test + [ check-target-builds ../accuracy//has_c99_cmath : TEST_C99 ] + [ check-target-builds has_libstdcxx_tr1 : TEST_LIBSTDCXX ] [ check-target-builds ../accuracy//has_gsl : TEST_GSL gsl gslcblas ] [ check-target-builds ../accuracy//has_rmath : TEST_RMATH Rmath ] # [ check-target-builds is_intel_win : no : ] @@ -113,46 +115,46 @@ rule all-tests { #msvc:64 ] ; } - return $(result) ; + return $(result) ; } - + # # Special cases to test different compiler options, # cbrt first as an example of a trivial function: # -run test_cbrt.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper +run test_cbrt.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper : : : debug COMPILER_COMPARISON_TABLES [ predef-require "BOOST_COMP_MSVC" ] 32 : test_cbrt_msvc_debug ; -run test_cbrt.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper +run test_cbrt.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper : : : release COMPILER_COMPARISON_TABLES [ predef-require "BOOST_COMP_MSVC" ] -Ox 32 : test_cbrt_msvc_release_32 ; -run test_cbrt.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper +run test_cbrt.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper : : : release COMPILER_COMPARISON_TABLES [ predef-require "BOOST_COMP_MSVC" ] -Ox 64 : test_cbrt_msvc_release_64 ; -run test_cbrt.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper +run test_cbrt.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper : : : release COMPILER_COMPARISON_TABLES [ check-target-builds is_intel_win : : no ] intel:-Ox 64 : test_cbrt_intel_release ; # # Now jn as a little more complex: # -run test_jn.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper +run test_jn.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper : : : debug COMPILER_COMPARISON_TABLES ../../test [ predef-require "BOOST_COMP_MSVC" ] 32 : test_jn_msvc_debug ; -run test_jn.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper +run test_jn.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper : : : release COMPILER_COMPARISON_TABLES ../../test [ predef-require "BOOST_COMP_MSVC" ] -Ox 32 : test_jn_msvc_release_32 ; -run test_jn.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper +run test_jn.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper : : : release COMPILER_COMPARISON_TABLES ../../test [ predef-require "BOOST_COMP_MSVC" ] -Ox 64 : test_jn_msvc_release_64 ; -run test_jn.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper +run test_jn.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper : : : release COMPILER_COMPARISON_TABLES ../../test [ check-target-builds is_intel_win : : no ] 64 : test_jn_intel_release ; # # Then something really expensive, like the inverse-incomplete-beta: # -run test_ibeta_inv.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper +run test_ibeta_inv.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper : : : debug COMPILER_COMPARISON_TABLES ../../test [ predef-require "BOOST_COMP_MSVC" ] 32 : test_ibeta_inv_msvc_debug ; -run test_ibeta_inv.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper +run test_ibeta_inv.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper : : : release COMPILER_COMPARISON_TABLES ../../test [ predef-require "BOOST_COMP_MSVC" ] -Ox 32 : test_ibeta_inv_msvc_release_32 ; -run test_ibeta_inv.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper +run test_ibeta_inv.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper : : : release COMPILER_COMPARISON_TABLES ../../test [ predef-require "BOOST_COMP_MSVC" ] -Ox 64 : test_ibeta_inv_msvc_release_64 ; -run test_ibeta_inv.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper +run test_ibeta_inv.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper : : : release COMPILER_COMPARISON_TABLES ../../test [ check-target-builds is_intel_win : : no ] intel:-Ox 64 : test_ibeta_inv_intel_release ; -test-suite report_gen : [ all-tests ] test_cbrt_msvc_debug test_cbrt_msvc_release_32 test_cbrt_msvc_release_64 test_cbrt_intel_release - test_jn_msvc_debug test_jn_msvc_release_32 test_jn_msvc_release_64 test_jn_intel_release test_ibeta_inv_msvc_debug +test-suite report_gen : [ all-tests ] test_cbrt_msvc_debug test_cbrt_msvc_release_32 test_cbrt_msvc_release_64 test_cbrt_intel_release + test_jn_msvc_debug test_jn_msvc_release_32 test_jn_msvc_release_64 test_jn_intel_release test_ibeta_inv_msvc_debug test_ibeta_inv_msvc_release_32 test_ibeta_inv_msvc_release_64 test_ibeta_inv_intel_release ; path-constant images_location : html ; @@ -165,7 +167,7 @@ boostbook standalone : # Path for links to Boost: boost.root=../../../../.. - + # Some general style settings: table.footnote.number.format=1 footnote.number.format=1 diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e63471c891..95d7849f6b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -2,4 +2,54 @@ # Distributed under the Boost Software License, Version 1.0. # https://www.boost.org/LICENSE_1_0.txt -add_subdirectory(compile_test) +include(BoostTestJamfile OPTIONAL RESULT_VARIABLE HAVE_BOOST_TEST) + +if(HAVE_BOOST_TEST) + + boost_test(SOURCES check_cmake_version.cpp ARGUMENTS ${PROJECT_VERSION} LINK_LIBRARIES Boost::core Boost::config) + + if (BOOST_MATH_ENABLE_CUDA) + + message(STATUS "Building boost.math with CUDA") + + find_package(CUDA REQUIRED) + enable_language(CUDA) + set(CMAKE_CUDA_EXTENSIONS OFF) + + enable_testing() + + boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::math Boost::assert Boost::concept_check Boost::config Boost::core Boost::integer Boost::lexical_cast Boost::multiprecision Boost::predef Boost::random Boost::static_assert Boost::throw_exception Boost::unit_test_framework ${CUDA_LIBRARIES} INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} ) + + elseif (BOOST_MATH_ENABLE_NVRTC) + + message(STATUS "Building boost.math with NVRTC") + + find_package(CUDA REQUIRED) + + enable_testing() + + set(CUDA_nvrtc_LIBRARY /usr/local/cuda/lib64/libnvrtc.so) + + if (BOOST_MATH_NVRTC_CI_RUN) + boost_test_jamfile(FILE nvrtc_jamfile LINK_LIBRARIES Boost::math Boost::assert Boost::concept_check Boost::config Boost::core Boost::integer Boost::lexical_cast Boost::multiprecision Boost::predef Boost::random Boost::static_assert Boost::throw_exception ${CUDA_nvrtc_LIBRARY} ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY} COMPILE_DEFINITIONS BOOST_MATH_NVRTC_CI_RUN=1 INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} ) + else () + boost_test_jamfile(FILE nvrtc_jamfile LINK_LIBRARIES Boost::math Boost::assert Boost::concept_check Boost::config Boost::core Boost::integer Boost::lexical_cast Boost::multiprecision Boost::predef Boost::random Boost::static_assert Boost::throw_exception ${CUDA_nvrtc_LIBRARY} ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY} INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} ) + endif() + + elseif (BOOST_MATH_ENABLE_SYCL) + + message(STATUS "Building boost.math with SYCL") + + set(CMAKE_CXX_COMPILER "icpx") + set(CMAKE_C_COMPILER "icx") + + enable_testing() + + boost_test_jamfile(FILE sycl_jamfile LINK_LIBRARIES Boost::math Boost::assert Boost::concept_check Boost::config Boost::core Boost::integer Boost::lexical_cast Boost::multiprecision Boost::predef Boost::random Boost::static_assert Boost::throw_exception Boost::unit_test_framework sycl COMPILE_OPTIONS -fsycl ) + else() + + add_subdirectory(compile_test) + + endif() + +endif() diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index c7eaa3b1eb..4adb29d160 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -12,7 +12,8 @@ import testing ; import modules ; import path ; import pch ; -import ../../config/checks/config : requires ; +import-search /boost/config/checks ; +import config : requires ; local ntl-path = [ modules.peek : NTL_PATH ] ; local gmp_path = [ modules.peek : GMP_PATH ] ; @@ -29,11 +30,16 @@ if $(remove-test-targets) OBJ_REMOVAL_OPTIONS = off ; } -obj no_eh : noeh_support.cpp ; +obj no_eh : noeh_support.cpp : /boost/config//boost_config ; project : requirements + /boost/math//boost_math + /boost/multiprecision//boost_multiprecision + /boost/test//included + /boost/type_index//boost_type_index + /boost/ublas//boost_ublas $(OBJ_REMOVAL_OPTIONS) acc:+W2068,2461,2236,4070,4069 intel-win:-nologo @@ -59,7 +65,6 @@ project borland:static # msvc:/wd4506 has no effect? # suppress xstring(237) : warning C4506: no definition for inline function - ../../.. off:no_eh shared:BOOST_REGEX_DYN_LINK=1 # For simplicities sake, make everything a static lib: @@ -84,178 +89,193 @@ if $(ntl-path) } else { - lib ntl ; + searched-lib ntl ; } explicit ntl ; -cpp-pch pch : pch.hpp : ../../test/build//boost_unit_test_framework ; -cpp-pch pch_light : pch_light.hpp : ../../test/build//boost_unit_test_framework ; +cpp-pch pch : pch.hpp : /boost/test//boost_unit_test_framework ; +cpp-pch pch_light : pch_light.hpp : /boost/test//boost_unit_test_framework ; lib compile_test_main : compile_test/main.cpp ; +searched-lib quadmath ; + +local float128_type_intel_quad = + [ check-target-builds ../config//has_intel_quad "Intel _Quad datatype support" + : -Qoption,cpp,--extended_float_type BOOST_MATH_USE_FLOAT128 ] ; +local float128_type_gcc = + [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" + : gcc:quadmath gcc:BOOST_MATH_TEST_FLOAT128 ] ; +local float128_type_floatmax = + [ check-target-builds ../config//has_128bit_floatmax_t "128-bit floatmax_t" : : no ] ; +local float128_type = + $(float128_type_intel_quad) $(float128_type_gcc) $(float128_type_floatmax) ; + test-suite special_fun : - [ run test_1F0.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=1 : test_1F0_1 ] # hypergeometric_pFq_checked_series.hpp uses auto, the rest are from quadrature tests. - [ run test_1F0.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=2 : test_1F0_2 ] # hypergeometric_pFq_checked_series.hpp uses auto, the rest are from quadrature tests. - [ run test_1F0.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=3 : test_1F0_3 ] # hypergeometric_pFq_checked_series.hpp uses auto, the rest are from quadrature tests. - [ run test_2F0.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] TEST=1 : test_2F0_1 ] - [ run test_2F0.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] TEST=2 : test_2F0_2 ] - [ run test_2F0.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] TEST=3 : test_2F0_3 ] - [ run test_2F0.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] TEST=4 : test_2F0_4 ] + [ run test_1F0.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=1 : test_1F0_1 ] # hypergeometric_pFq_checked_series.hpp uses auto, the rest are from quadrature tests. + [ run test_1F0.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=2 : test_1F0_2 ] # hypergeometric_pFq_checked_series.hpp uses auto, the rest are from quadrature tests. + [ run test_1F0.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=3 : test_1F0_3 ] # hypergeometric_pFq_checked_series.hpp uses auto, the rest are from quadrature tests. + [ run test_2F0.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] $(float128_type) TEST=1 : test_2F0_1 ] + [ run test_2F0.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] $(float128_type) TEST=2 : test_2F0_2 ] + [ run test_2F0.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] $(float128_type) TEST=3 : test_2F0_3 ] + [ run test_2F0.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] $(float128_type) TEST=4 : test_2F0_4 ] - [ run test_0F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=1 : test_0F1_1 ] - [ run test_0F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=2 : test_0F1_2 ] + [ run test_0F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=1 : test_0F1_1 ] + [ run test_0F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=2 : test_0F1_2 ] - [ run test_1F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=1 clang:-Wno-literal-range : test_1F1_integrals ] - [ run test_1F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=2 clang:-Wno-literal-range : test_1F1_float ] - [ run test_1F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=3 clang:-Wno-literal-range : test_1F1_double ] - [ run test_1F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=4 release clang:-Wno-literal-range : test_1F1_long_double ] + [ run test_1F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=1 clang:-Wno-literal-range : test_1F1_integrals ] + [ run test_1F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=2 clang:-Wno-literal-range : test_1F1_float ] + [ run test_1F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=3 clang:-Wno-literal-range : test_1F1_double ] + [ run test_1F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=4 release clang:-Wno-literal-range : test_1F1_long_double ] - [ run test_1F1_regularized.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=2 clang:-Wno-literal-range : test_1F1_regularized_float ] - [ run test_1F1_regularized.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=3 clang:-Wno-literal-range : test_1F1_regularized_double ] - [ run test_1F1_regularized.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=4 release clang:-Wno-literal-range : test_1F1_regularized_long_double ] - [ run test_1F1_regularized.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=5 clang:-Wno-literal-range : test_1F1_regularized_real_concept ] + [ run test_1F1_regularized.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=2 clang:-Wno-literal-range : test_1F1_regularized_float ] + [ run test_1F1_regularized.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=3 clang:-Wno-literal-range : test_1F1_regularized_double ] + [ run test_1F1_regularized.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=4 release clang:-Wno-literal-range : test_1F1_regularized_long_double ] + [ run test_1F1_regularized.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=5 clang:-Wno-literal-range : test_1F1_regularized_real_concept ] # These are slow... - [ run test_1F1_log.cpp ../../test/build//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=2 clang:-Wno-literal-range : test_1F1_log_float ] - [ run test_1F1_log.cpp ../../test/build//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=3 clang:-Wno-literal-range : test_1F1_log_double ] - [ run test_1F1_log.cpp ../../test/build//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=4 release clang:-Wno-literal-range : test_1F1_log_long_double ] - [ run test_1F1_log.cpp ../../test/build//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=5 clang:-Wno-literal-range : test_1F1_log_real_concept ] + [ run test_1F1_log.cpp /boost/test//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=2 clang:-Wno-literal-range : test_1F1_log_float ] + [ run test_1F1_log.cpp /boost/test//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=3 clang:-Wno-literal-range : test_1F1_log_double ] + [ run test_1F1_log.cpp /boost/test//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=4 release clang:-Wno-literal-range : test_1F1_log_long_double ] + [ run test_1F1_log.cpp /boost/test//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=5 clang:-Wno-literal-range : test_1F1_log_real_concept ] # pFq: - [ run test_pFq.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=2 release clang:-Wno-literal-range : test_pFq_float ] - [ run test_pFq.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=3 release clang:-Wno-literal-range : test_pFq_double ] - [ run test_pFq.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=4 release clang:-Wno-literal-range : test_pFq_long_double ] - [ run test_pFq.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=5 release clang:-Wno-literal-range : test_pFq_real_concept ] + [ run test_pFq.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=2 release clang:-Wno-literal-range : test_pFq_float ] + [ run test_pFq.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=3 release clang:-Wno-literal-range : test_pFq_double ] + [ run test_pFq.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=4 release clang:-Wno-literal-range : test_pFq_long_double ] + [ run test_pFq.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=5 release clang:-Wno-literal-range : test_pFq_real_concept ] - [ run hypot_test.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run pow_test.cpp ../../test/build//boost_unit_test_framework ] - [ run logaddexp_test.cpp ../../test/build//boost_unit_test_framework ] - [ run logsumexp_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_variadic_templates ] ] - [ run ccmath_sqrt_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_isinf_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_isnan_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_abs_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_isfinite_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_isnormal_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_fpclassify_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_frexp_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_ldexp_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_div_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_logb_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_ilogb_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_scalbn_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_scalbln_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_floor_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_ceil_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_trunc_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_modf_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_round_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_fmod_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_remainder_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_copysign_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_hypot_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_fdim_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_fmax_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_fmin_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_isgreater_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_isgreaterequal_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_isless_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_islessequal_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_isunordered_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_next_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_fma_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run ccmath_signbit_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run log1p_expm1_test.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run powm1_sqrtp1m1_test.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run git_issue_705.cpp ../../test/build//boost_unit_test_framework ] - [ run git_issue_810.cpp ../../test/build//boost_unit_test_framework ] - [ run git_issue_826.cpp ../../test/build//boost_unit_test_framework ] + [ run hypot_test.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run pow_test.cpp /boost/test//boost_unit_test_framework ] + [ run logaddexp_test.cpp /boost/test//boost_unit_test_framework ] + [ run logsumexp_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_variadic_templates ] ] + [ run ccmath_sqrt_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_isinf_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_isnan_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_abs_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_isfinite_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_isnormal_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_fpclassify_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_frexp_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_ldexp_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_div_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_logb_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_ilogb_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_scalbn_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_scalbln_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_floor_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_ceil_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_trunc_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_modf_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_round_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_fmod_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_remainder_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_copysign_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_hypot_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_fdim_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_fmax_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_fmin_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_isgreater_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_isgreaterequal_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_isless_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_islessequal_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_isunordered_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_next_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_fma_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run ccmath_signbit_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ] + [ run log1p_expm1_test.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run powm1_sqrtp1m1_test.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run git_issue_705.cpp /boost/test//boost_unit_test_framework ] + [ run git_issue_810.cpp /boost/test//boost_unit_test_framework ] + [ run git_issue_826.cpp /boost/test//boost_unit_test_framework ] [ run git_issue_961.cpp ] [ run git_issue_1006.cpp ] [ run git_issue_184.cpp ] [ run git_issue_1137.cpp ] [ run git_issue_1139.cpp ] - [ run special_functions_test.cpp ../../test/build//boost_unit_test_framework ] - [ run test_airy.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_bessel_j.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_bessel_y.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_bessel_i.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_bessel_k.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_bessel_j_prime.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_bessel_y_prime.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_bessel_i_prime.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_bessel_k_prime.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run bessel_iterator_test.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_beta.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_bessel_airy_zeros.cpp ../../test/build//boost_unit_test_framework ] - [ run test_bernoulli_constants.cpp ../../test/build//boost_unit_test_framework ] - [ run test_binomial_coeff.cpp pch ../../test/build//boost_unit_test_framework ] - [ run test_carlson.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run git_issue_1175.cpp ] + [ run git_issue_1194.cpp ] + [ run special_functions_test.cpp /boost/test//boost_unit_test_framework ] + [ run test_airy.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_bessel_j.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_bessel_y.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_bessel_i.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_bessel_k.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_bessel_j_prime.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_bessel_y_prime.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_bessel_i_prime.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_bessel_k_prime.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run bessel_iterator_test.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_beta.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_bessel_airy_zeros.cpp /boost/test//boost_unit_test_framework ] + [ run test_bernoulli_constants.cpp /boost/test//boost_unit_test_framework ] + [ run test_binomial_coeff.cpp pch /boost/test//boost_unit_test_framework ] + [ run test_carlson.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST1 : test_carlson_1 ] - [ run test_carlson.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_carlson.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST2 : test_carlson_2 ] - [ run test_carlson.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_carlson.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST3 : test_carlson_3 ] - [ run test_carlson.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_carlson.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST4 : test_carlson_4 ] - [ run test_cbrt.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_difference.cpp ../../test/build//boost_unit_test_framework ] - [ run test_digamma.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_ellint_1.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_ellint_2.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_ellint_3.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_ellint_d.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_jacobi_theta.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] ] - [ run test_jacobi_zeta.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_heuman_lambda.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_erf.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] + [ run test_cbrt.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_difference.cpp /boost/test//boost_unit_test_framework ] + [ run test_digamma.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_ellint_1.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_ellint_2.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_ellint_3.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_ellint_d.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_jacobi_theta.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] ] + [ run test_jacobi_zeta.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_heuman_lambda.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_erf.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] [ run erf_limits_test.cpp ] - [ run test_expint.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_factorials.cpp pch ../../test/build//boost_unit_test_framework ] - [ run test_gamma.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] + [ run test_expint.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_factorials.cpp pch /boost/test//boost_unit_test_framework ] + [ run test_gamma.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] [ run test_gamma_edge.cpp ] - [ run test_gamma_mp.cpp ../../test/build//boost_unit_test_framework : : : release TEST=1 [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_gamma_mp_1 ] - [ run test_gamma_mp.cpp ../../test/build//boost_unit_test_framework : : : release TEST=2 [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_gamma_mp_2 ] - [ run test_gamma_mp.cpp ../../test/build//boost_unit_test_framework : : : release TEST=3 [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_gamma_mp_3 ] - [ run test_hankel.cpp ../../test/build//boost_unit_test_framework ] - [ run test_hermite.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_ibeta.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_gamma_mp.cpp /boost/test//boost_unit_test_framework : : : release TEST=1 [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_gamma_mp_1 ] + [ run test_gamma_mp.cpp /boost/test//boost_unit_test_framework : : : release TEST=2 [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_gamma_mp_2 ] + [ run test_gamma_mp.cpp /boost/test//boost_unit_test_framework : : : release TEST=3 [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_gamma_mp_3 ] + [ run test_hankel.cpp /boost/test//boost_unit_test_framework ] + [ run test_hermite.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_ibeta.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_FLOAT intel:off : test_ibeta_float ] - [ run test_ibeta.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_DOUBLE intel:off : test_ibeta_double ] - [ run test_ibeta.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_LDOUBLE intel:off : test_ibeta_long_double ] - [ run test_ibeta.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -263,7 +283,7 @@ test-suite special_fun : TEST_DATA=1 intel:off : test_ibeta_real_concept1 ] - [ run test_ibeta.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -271,7 +291,7 @@ test-suite special_fun : TEST_DATA=2 intel:off : test_ibeta_real_concept2 ] - [ run test_ibeta.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -279,7 +299,7 @@ test-suite special_fun : TEST_DATA=3 intel:off : test_ibeta_real_concept3 ] - [ run test_ibeta.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -288,7 +308,7 @@ test-suite special_fun : intel:off : test_ibeta_real_concept4 ] - [ run test_ibeta_derivative.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta_derivative.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -296,7 +316,7 @@ test-suite special_fun : intel:off gcc:-Wno-overflow : test_ibeta_derivative_float ] - [ run test_ibeta_derivative.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta_derivative.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -304,7 +324,7 @@ test-suite special_fun : intel:off gcc:-Wno-overflow : test_ibeta_derivative_double ] - [ run test_ibeta_derivative.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta_derivative.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -312,7 +332,7 @@ test-suite special_fun : intel:off gcc:-Wno-overflow : test_ibeta_derivative_long_double ] - [ run test_ibeta_derivative.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta_derivative.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -321,7 +341,7 @@ test-suite special_fun : intel:off gcc:-Wno-overflow : test_ibeta_derivative_real_concept1 ] - [ run test_ibeta_derivative.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta_derivative.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -330,7 +350,7 @@ test-suite special_fun : intel:off gcc:-Wno-overflow : test_ibeta_derivative_real_concept2 ] - [ run test_ibeta_derivative.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta_derivative.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -339,7 +359,7 @@ test-suite special_fun : intel:off gcc:-Wno-overflow : test_ibeta_derivative_real_concept3 ] - [ run test_ibeta_derivative.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta_derivative.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -349,28 +369,28 @@ test-suite special_fun : gcc:-Wno-overflow : test_ibeta_derivative_real_concept4 ] - [ run test_ibeta_inv.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta_inv.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_FLOAT intel:off : test_ibeta_inv_float ] - [ run test_ibeta_inv.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta_inv.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_DOUBLE intel:off : test_ibeta_inv_double ] - [ run test_ibeta_inv.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta_inv.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_LDOUBLE intel:off : test_ibeta_inv_long_double ] - [ run test_ibeta_inv.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta_inv.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -378,7 +398,7 @@ test-suite special_fun : TEST_DATA=1 intel:off : test_ibeta_inv_real_concept1 ] - [ run test_ibeta_inv.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta_inv.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -386,7 +406,7 @@ test-suite special_fun : TEST_DATA=2 intel:off : test_ibeta_inv_real_concept2 ] - [ run test_ibeta_inv.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta_inv.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -394,7 +414,7 @@ test-suite special_fun : TEST_DATA=3 intel:off : test_ibeta_inv_real_concept3 ] - [ run test_ibeta_inv.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta_inv.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -402,28 +422,28 @@ test-suite special_fun : TEST_DATA=4 intel:off : test_ibeta_inv_real_concept4 ] - [ run test_ibeta_inv_ab.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta_inv_ab.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_FLOAT intel:off : test_ibeta_inv_ab_float ] - [ run test_ibeta_inv_ab.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta_inv_ab.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_DOUBLE intel:off : test_ibeta_inv_ab_double ] - [ run test_ibeta_inv_ab.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta_inv_ab.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_LDOUBLE intel:off : test_ibeta_inv_ab_long_double ] - [ run test_ibeta_inv_ab.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta_inv_ab.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -431,7 +451,7 @@ test-suite special_fun : TEST_DATA=1 intel:off : test_ibeta_inv_ab_real_concept1 ] - [ run test_ibeta_inv_ab.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta_inv_ab.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -439,7 +459,7 @@ test-suite special_fun : TEST_DATA=2 intel:off : test_ibeta_inv_ab_real_concept2 ] - [ run test_ibeta_inv_ab.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_ibeta_inv_ab.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -447,57 +467,57 @@ test-suite special_fun : TEST_DATA=3 intel:off : test_ibeta_inv_ab_real_concept3 ] - [ run test_igamma.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_igamma_inv.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_igamma.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_igamma_inv.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_FLOAT intel:off : test_igamma_inv_float ] - [ run test_igamma_inv.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_igamma_inv.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_DOUBLE intel:off : test_igamma_inv_double ] - [ run test_igamma_inv.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_igamma_inv.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_LDOUBLE intel:off : test_igamma_inv_long_double ] - [ run test_igamma_inv.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_igamma_inv.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_REAL_CONCEPT intel:off : test_igamma_inv_real_concept ] - [ run test_igamma_inva.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_igamma_inva.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_FLOAT intel:off : test_igamma_inva_float ] - [ run test_igamma_inva.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_igamma_inva.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_DOUBLE intel:off : test_igamma_inva_double ] - [ run test_igamma_inva.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_igamma_inva.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_LDOUBLE intel:off : test_igamma_inva_long_double ] - [ run test_igamma_inva.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework + [ run test_igamma_inva.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -507,23 +527,23 @@ test-suite special_fun : [ run test_instantiate1.cpp test_instantiate2.cpp ] [ run test_instantiate1.cpp test_instantiate2.cpp : : : off : test_instantiate_no_eh ] [ run test_instantiate3.cpp ] - [ run test_jacobi.cpp pch_light ../../test/build//boost_unit_test_framework ] - [ run test_laguerre.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] + [ run test_jacobi.cpp pch_light /boost/test//boost_unit_test_framework ] + [ run test_laguerre.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] - [ run test_lambert_w.cpp ../../test/build//boost_unit_test_framework ] - [ run test_lambert_w.cpp ../../test/build//boost_unit_test_framework : : : BOOST_MATH_TEST_MULTIPRECISION=1 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_lambert_w_multiprecision_1 ] - [ run test_lambert_w.cpp ../../test/build//boost_unit_test_framework : : : BOOST_MATH_TEST_MULTIPRECISION=2 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_lambert_w_multiprecision_2 ] - [ run test_lambert_w.cpp ../../test/build//boost_unit_test_framework : : : BOOST_MATH_TEST_MULTIPRECISION=3 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_lambert_w_multiprecision_3 ] - [ run test_lambert_w.cpp ../../test/build//boost_unit_test_framework : : : BOOST_MATH_TEST_MULTIPRECISION=4 BOOST_MATH_TEST_FLOAT128 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_lambert_w_multiprecision_4 ] - [ run test_lambert_w_integrals_float128.cpp ../../test/build//boost_unit_test_framework : : : release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : "-Bstatic -lquadmath -Bdynamic" : no ] [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] - [ run test_lambert_w_integrals_quad.cpp ../../test/build//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] - [ run test_lambert_w_integrals_long_double.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] ] - [ run test_lambert_w_integrals_double.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] ] - [ run test_lambert_w_integrals_float.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] ] - [ run test_lambert_w_derivative.cpp ../../test/build//boost_unit_test_framework : : : BOOST_MATH_TEST_MULTIPRECISION [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] + [ run test_lambert_w.cpp /boost/test//boost_unit_test_framework ] + [ run test_lambert_w.cpp /boost/test//boost_unit_test_framework : : : BOOST_MATH_TEST_MULTIPRECISION=1 $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_lambert_w_multiprecision_1 ] + [ run test_lambert_w.cpp /boost/test//boost_unit_test_framework : : : BOOST_MATH_TEST_MULTIPRECISION=2 $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_lambert_w_multiprecision_2 ] + [ run test_lambert_w.cpp /boost/test//boost_unit_test_framework : : : BOOST_MATH_TEST_MULTIPRECISION=3 $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_lambert_w_multiprecision_3 ] + [ run test_lambert_w.cpp /boost/test//boost_unit_test_framework : : : BOOST_MATH_TEST_MULTIPRECISION=4 BOOST_MATH_TEST_FLOAT128 $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_lambert_w_multiprecision_4 ] + [ run test_lambert_w_integrals_float128.cpp /boost/test//boost_unit_test_framework : : : release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : "-Bstatic -lquadmath -Bdynamic" : no ] [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] + [ run test_lambert_w_integrals_quad.cpp /boost/test//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] + [ run test_lambert_w_integrals_long_double.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] ] + [ run test_lambert_w_integrals_double.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] ] + [ run test_lambert_w_integrals_float.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] ] + [ run test_lambert_w_derivative.cpp /boost/test//boost_unit_test_framework : : : BOOST_MATH_TEST_MULTIPRECISION $(float128_type) ] - [ run test_legendre.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : "-Bstatic -lquadmath -Bdynamic" ] ] - [ run chebyshev_test.cpp : : : [ requires cxx11_inline_namespaces cxx11_unified_initialization_syntax cxx11_hdr_tuple cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations cxx11_range_based_for cxx11_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : "-Bstatic -lquadmath -Bdynamic" ] ] + [ run test_legendre.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : : : $(float128_type) ] + [ run chebyshev_test.cpp : : : [ requires cxx11_inline_namespaces cxx11_unified_initialization_syntax cxx11_hdr_tuple cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations cxx11_range_based_for cxx11_constexpr ] $(float128_type) ] [ run chebyshev_transform_test.cpp ../config//fftw3f : : : TEST1 [ requires cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations cxx11_range_based_for ] [ check-target-builds ../config//has_fftw3 "libfftw3" : : no ] : chebyshev_transform_test_1 ] [ run chebyshev_transform_test.cpp ../config//fftw3 : : : TEST2 [ requires cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations cxx11_range_based_for ] [ check-target-builds ../config//has_fftw3 "libfftw3" : : no ] : chebyshev_transform_test_2 ] [ run chebyshev_transform_test.cpp ../config//fftw3l : : : TEST3 [ requires cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations cxx11_range_based_for ] [ check-target-builds ../config//has_fftw3 "libfftw3" : : no ] : chebyshev_transform_test_3 ] @@ -535,52 +555,56 @@ test-suite special_fun : [ run cardinal_trigonometric_test.cpp ../config//fftw3q ../config//quadmath : : : TEST4 [ requires cxx11_auto_declarations cxx11_range_based_for ] [ check-target-builds ../config//has_fftw3 "libfftw3" : : no ] [ check-target-builds ../config//has_float128 "__float128" : : no ] : cardinal_trigonometric_test_4 ] - [ run test_ldouble_simple.cpp ../../test/build//boost_unit_test_framework ] + [ run test_ldouble_simple.cpp /boost/test//boost_unit_test_framework ] # Needs to run in release mode, as it's rather slow: - [ run test_next.cpp pch ../../test/build//boost_unit_test_framework : : : release ] - [ run test_next_decimal.cpp pch ../../test/build//boost_unit_test_framework : : : release ] - [ run test_owens_t.cpp ../../test/build//boost_unit_test_framework ] - [ run test_polygamma.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_trigamma.cpp test_instances//test_instances ../../test/build//boost_unit_test_framework ] - [ run test_round.cpp pch ../../test/build//boost_unit_test_framework ] - [ run git_issue_430.cpp pch ../../test/build//boost_unit_test_framework ] - [ run test_spherical_harmonic.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_sign.cpp ../../test/build//boost_unit_test_framework ] - [ run test_tgamma_for_issue396_part1.cpp ../../test/build//boost_unit_test_framework : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : "-Bstatic -lquadmath -Bdynamic" ] gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] ] - [ run test_tgamma_for_issue396_part2.cpp ../../test/build//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] ] - [ run test_tgamma_ratio.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_trig.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] - [ run test_zeta.cpp ../../test/build//boost_unit_test_framework test_instances//test_instances pch_light ] - [ run test_sinc.cpp ../../test/build//boost_unit_test_framework pch_light ] - [ run test_fibonacci.cpp ../../test/build//boost_unit_test_framework ] + [ run test_next.cpp pch /boost/test//boost_unit_test_framework : : : release ] + [ run test_next_decimal.cpp pch /boost/test//boost_unit_test_framework : : : release ] + [ run test_owens_t.cpp /boost/test//boost_unit_test_framework ] + [ run test_polygamma.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_trigamma.cpp test_instances//test_instances /boost/test//boost_unit_test_framework ] + [ run test_round.cpp pch /boost/test//boost_unit_test_framework ] + [ run git_issue_430.cpp pch /boost/test//boost_unit_test_framework ] + [ run test_spherical_harmonic.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_sign.cpp /boost/test//boost_unit_test_framework ] + [ run test_tgamma_for_issue396_part1.cpp /boost/test//boost_unit_test_framework : : : $(float128_type) gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] ] + [ run test_tgamma_for_issue396_part2.cpp /boost/test//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] ] + [ run test_tgamma_ratio.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_trig.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ] + [ run test_zeta.cpp /boost/test//boost_unit_test_framework test_instances//test_instances pch_light ] + [ run test_sinc.cpp /boost/test//boost_unit_test_framework pch_light ] + [ run test_fibonacci.cpp /boost/test//boost_unit_test_framework ] ; test-suite distribution_tests : - [ run test_arcsine.cpp pch ../../test/build//boost_unit_test_framework ] - [ run test_bernoulli.cpp ../../test/build//boost_unit_test_framework ] - [ run test_beta_dist.cpp ../../test/build//boost_unit_test_framework ] - [ run test_binomial.cpp ../../test/build//boost_unit_test_framework + [ run test_arcsine.cpp pch /boost/test//boost_unit_test_framework ] + [ run test_landau.cpp pch : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : "-Bstatic -lquadmath -Bdynamic" ] ] + [ run test_saspoint5.cpp pch : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : "-Bstatic -lquadmath -Bdynamic" ] ] + [ run test_holtsmark.cpp pch : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : "-Bstatic -lquadmath -Bdynamic" ] ] + [ run test_mapairy.cpp pch : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : "-Bstatic -lquadmath -Bdynamic" ] ] + [ run test_bernoulli.cpp /boost/test//boost_unit_test_framework ] + [ run test_beta_dist.cpp /boost/test//boost_unit_test_framework ] + [ run test_binomial.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_FLOAT intel:off : test_binomial_float ] - [ run test_binomial.cpp ../../test/build//boost_unit_test_framework + [ run test_binomial.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_DOUBLE intel:off : test_binomial_double ] - [ run test_binomial.cpp ../../test/build//boost_unit_test_framework + [ run test_binomial.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_LDOUBLE intel:off : test_binomial_long_double ] - [ run test_binomial.cpp ../../test/build//boost_unit_test_framework + [ run test_binomial.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -588,7 +612,7 @@ test-suite distribution_tests : TEST_ROUNDING=0 intel:off : test_binomial_real_concept0 ] - [ run test_binomial.cpp ../../test/build//boost_unit_test_framework + [ run test_binomial.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -596,7 +620,7 @@ test-suite distribution_tests : TEST_ROUNDING=1 intel:off : test_binomial_real_concept1 ] - [ run test_binomial.cpp ../../test/build//boost_unit_test_framework + [ run test_binomial.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -604,7 +628,7 @@ test-suite distribution_tests : TEST_ROUNDING=2 intel:off : test_binomial_real_concept2 ] - [ run test_binomial.cpp ../../test/build//boost_unit_test_framework + [ run test_binomial.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -612,7 +636,7 @@ test-suite distribution_tests : TEST_ROUNDING=3 intel:off : test_binomial_real_concept3 ] - [ run test_binomial.cpp ../../test/build//boost_unit_test_framework + [ run test_binomial.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -620,7 +644,7 @@ test-suite distribution_tests : TEST_ROUNDING=4 intel:off : test_binomial_real_concept4 ] - [ run test_binomial.cpp ../../test/build//boost_unit_test_framework + [ run test_binomial.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -628,7 +652,7 @@ test-suite distribution_tests : TEST_ROUNDING=5 intel:off : test_binomial_real_concept5 ] - [ run test_binomial.cpp ../../test/build//boost_unit_test_framework + [ run test_binomial.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -636,145 +660,145 @@ test-suite distribution_tests : TEST_ROUNDING=6 intel:off : test_binomial_real_concept6 ] - [ run test_cauchy.cpp ../../test/build//boost_unit_test_framework ] - [ run test_chi_squared.cpp ../../test/build//boost_unit_test_framework ] - [ run test_dist_overloads.cpp ../../test/build//boost_unit_test_framework ] - [ run test_exponential_dist.cpp ../../test/build//boost_unit_test_framework ] - [ run test_extreme_value.cpp ../../test/build//boost_unit_test_framework ] - [ run test_find_location.cpp pch ../../test/build//boost_unit_test_framework ] - [ run test_find_scale.cpp pch ../../test/build//boost_unit_test_framework ] - [ run test_fisher_f.cpp ../../test/build//boost_unit_test_framework ] - [ run test_gamma_dist.cpp pch ../../test/build//boost_unit_test_framework ] - [ run test_geometric.cpp ../../test/build//boost_unit_test_framework ] - [ run test_hyperexponential_dist.cpp ../../test/build//boost_unit_test_framework ] - [ run test_hypergeometric_dist.cpp ../../test/build//boost_unit_test_framework + [ run test_cauchy.cpp /boost/test//boost_unit_test_framework ] + [ run test_chi_squared.cpp /boost/test//boost_unit_test_framework ] + [ run test_dist_overloads.cpp /boost/test//boost_unit_test_framework ] + [ run test_exponential_dist.cpp /boost/test//boost_unit_test_framework ] + [ run test_extreme_value.cpp /boost/test//boost_unit_test_framework ] + [ run test_find_location.cpp pch /boost/test//boost_unit_test_framework ] + [ run test_find_scale.cpp pch /boost/test//boost_unit_test_framework ] + [ run test_fisher_f.cpp /boost/test//boost_unit_test_framework ] + [ run test_gamma_dist.cpp pch /boost/test//boost_unit_test_framework ] + [ run test_geometric.cpp /boost/test//boost_unit_test_framework ] + [ run test_hyperexponential_dist.cpp /boost/test//boost_unit_test_framework ] + [ run test_hypergeometric_dist.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_QUANT=0 intel:off : test_hypergeometric_dist0 ] - [ run test_hypergeometric_dist.cpp ../../test/build//boost_unit_test_framework + [ run test_hypergeometric_dist.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_QUANT=1 intel:off : test_hypergeometric_dist1 ] - [ run test_hypergeometric_dist.cpp ../../test/build//boost_unit_test_framework + [ run test_hypergeometric_dist.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_QUANT=2 intel:off : test_hypergeometric_dist2 ] - [ run test_hypergeometric_dist.cpp ../../test/build//boost_unit_test_framework + [ run test_hypergeometric_dist.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_QUANT=3 intel:off : test_hypergeometric_dist3 ] - [ run test_hypergeometric_dist.cpp ../../test/build//boost_unit_test_framework + [ run test_hypergeometric_dist.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_QUANT=4 intel:off : test_hypergeometric_dist4 ] - [ run test_hypergeometric_dist.cpp ../../test/build//boost_unit_test_framework + [ run test_hypergeometric_dist.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_QUANT=5 intel:off : test_hypergeometric_dist5 ] - [ run test_inverse_chi_squared_distribution.cpp ../../test/build//boost_unit_test_framework ] - [ run test_inverse_gamma_distribution.cpp ../../test/build//boost_unit_test_framework ] - [ run test_inverse_gaussian.cpp ../../test/build//boost_unit_test_framework ] - [ run test_kolmogorov_smirnov.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] ] - [ run test_laplace.cpp ../../test/build//boost_unit_test_framework ] - [ run test_inv_hyp.cpp pch ../../test/build//boost_unit_test_framework ] - [ run test_logistic_dist.cpp ../../test/build//boost_unit_test_framework ] - [ run test_lognormal.cpp ../../test/build//boost_unit_test_framework ] - [ run test_negative_binomial.cpp ../../test/build//boost_unit_test_framework + [ run test_inverse_chi_squared_distribution.cpp /boost/test//boost_unit_test_framework ] + [ run test_inverse_gamma_distribution.cpp /boost/test//boost_unit_test_framework ] + [ run test_inverse_gaussian.cpp /boost/test//boost_unit_test_framework ] + [ run test_kolmogorov_smirnov.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] ] + [ run test_laplace.cpp /boost/test//boost_unit_test_framework ] + [ run test_inv_hyp.cpp pch /boost/test//boost_unit_test_framework ] + [ run test_logistic_dist.cpp /boost/test//boost_unit_test_framework ] + [ run test_lognormal.cpp /boost/test//boost_unit_test_framework ] + [ run test_negative_binomial.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_FLOAT intel:off : test_negative_binomial_float ] - [ run test_negative_binomial.cpp ../../test/build//boost_unit_test_framework + [ run test_negative_binomial.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_DOUBLE intel:off : test_negative_binomial_double ] - [ run test_negative_binomial.cpp ../../test/build//boost_unit_test_framework + [ run test_negative_binomial.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_LDOUBLE intel:off : test_negative_binomial_long_double ] - [ run test_negative_binomial.cpp ../../test/build//boost_unit_test_framework + [ run test_negative_binomial.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_REAL_CONCEPT intel:off : test_negative_binomial_real_concept ] - [ run test_nc_chi_squared.cpp pch ../../test/build//boost_unit_test_framework + [ run test_nc_chi_squared.cpp pch /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_FLOAT intel:off : test_nc_chi_squared_float ] - [ run test_nc_chi_squared.cpp pch ../../test/build//boost_unit_test_framework + [ run test_nc_chi_squared.cpp pch /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_DOUBLE intel:off : test_nc_chi_squared_double ] - [ run test_nc_chi_squared.cpp pch ../../test/build//boost_unit_test_framework + [ run test_nc_chi_squared.cpp pch /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_LDOUBLE intel:off : test_nc_chi_squared_long_double ] - [ run test_nc_chi_squared.cpp pch ../../test/build//boost_unit_test_framework + [ run test_nc_chi_squared.cpp pch /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_REAL_CONCEPT intel:off : test_nc_chi_squared_real_concept ] - [ run test_nc_beta.cpp ../../test/build//boost_unit_test_framework + [ run test_nc_beta.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_FLOAT intel:off : test_nc_beta_float ] - [ run test_nc_beta.cpp ../../test/build//boost_unit_test_framework + [ run test_nc_beta.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_DOUBLE intel:off : test_nc_beta_double ] - [ run test_nc_beta.cpp ../../test/build//boost_unit_test_framework + [ run test_nc_beta.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_LDOUBLE intel:off : test_nc_beta_long_double ] - [ run test_nc_beta.cpp ../../test/build//boost_unit_test_framework + [ run test_nc_beta.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -782,7 +806,7 @@ test-suite distribution_tests : TEST_DATA=1 intel:off : test_nc_beta_real_concept1 ] - [ run test_nc_beta.cpp ../../test/build//boost_unit_test_framework + [ run test_nc_beta.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements @@ -790,109 +814,109 @@ test-suite distribution_tests : TEST_DATA=2 intel:off : test_nc_beta_real_concept2 ] - [ run test_nc_f.cpp pch ../../test/build//boost_unit_test_framework ] - [ run test_nc_t.cpp pch ../../test/build//boost_unit_test_framework + [ run test_nc_f.cpp pch /boost/test//boost_unit_test_framework ] + [ run test_nc_t.cpp pch /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_FLOAT intel:off : test_nc_t_float ] - [ run test_nc_t.cpp pch ../../test/build//boost_unit_test_framework + [ run test_nc_t.cpp pch /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_DOUBLE intel:off : test_nc_t_double ] - [ run test_nc_t.cpp pch ../../test/build//boost_unit_test_framework + [ run test_nc_t.cpp pch /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_LDOUBLE intel:off : test_nc_t_long_double ] - [ run test_nc_t.cpp pch ../../test/build//boost_unit_test_framework + [ run test_nc_t.cpp pch /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_REAL_CONCEPT intel:off : test_nc_t_real_concept ] - [ run test_normal.cpp pch ../../test/build//boost_unit_test_framework ] - [ run test_pareto.cpp ../../test/build//boost_unit_test_framework ] - [ run test_poisson.cpp ../../test/build//boost_unit_test_framework + [ run test_normal.cpp pch /boost/test//boost_unit_test_framework ] + [ run test_pareto.cpp /boost/test//boost_unit_test_framework ] + [ run test_poisson.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_FLOAT intel:off : test_poisson_float ] - [ run test_poisson.cpp ../../test/build//boost_unit_test_framework + [ run test_poisson.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_DOUBLE intel:off : test_poisson_double ] - [ run test_poisson.cpp ../../test/build//boost_unit_test_framework + [ run test_poisson.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_LDOUBLE intel:off : test_poisson_long_double ] - [ run test_poisson.cpp ../../test/build//boost_unit_test_framework + [ run test_poisson.cpp /boost/test//boost_unit_test_framework : # command line : # input files : # requirements TEST_REAL_CONCEPT intel:off : test_poisson_real_concept ] - [ run test_rayleigh.cpp ../../test/build//boost_unit_test_framework ] - [ run test_students_t.cpp ../../test/build//boost_unit_test_framework ] - [ run test_skew_normal.cpp ../../test/build//boost_unit_test_framework ] - [ run test_triangular.cpp pch ../../test/build//boost_unit_test_framework ] - [ run test_uniform.cpp pch ../../test/build//boost_unit_test_framework ] - [ run test_weibull.cpp ../../test/build//boost_unit_test_framework ] + [ run test_rayleigh.cpp /boost/test//boost_unit_test_framework ] + [ run test_students_t.cpp /boost/test//boost_unit_test_framework ] + [ run test_skew_normal.cpp /boost/test//boost_unit_test_framework ] + [ run test_triangular.cpp pch /boost/test//boost_unit_test_framework ] + [ run test_uniform.cpp pch /boost/test//boost_unit_test_framework ] + [ run test_weibull.cpp /boost/test//boost_unit_test_framework ] - [ run test_legacy_nonfinite.cpp ../../test/build//boost_unit_test_framework ] - [ run test_basic_nonfinite.cpp ../../test/build//boost_unit_test_framework ] - [ run test_lexical_cast.cpp ../../test/build//boost_unit_test_framework ] - [ run test_nonfinite_trap.cpp ../../test/build//boost_unit_test_framework : : : off:no ] - [ run test_signed_zero.cpp ../../test/build//boost_unit_test_framework ] - [ run complex_test.cpp ../../test/build//boost_unit_test_framework ] + [ run test_legacy_nonfinite.cpp /boost/test//boost_unit_test_framework ] + [ run test_basic_nonfinite.cpp /boost/test//boost_unit_test_framework ] + [ run test_lexical_cast.cpp /boost/test//boost_unit_test_framework ] + [ run test_nonfinite_trap.cpp /boost/test//boost_unit_test_framework : : : off:no ] + [ run test_signed_zero.cpp /boost/test//boost_unit_test_framework ] + [ run complex_test.cpp /boost/test//boost_unit_test_framework ] [ compile test_dist_deduction_guides.cpp : [ requires cpp_deduction_guides cpp_variadic_templates ] ] - [ run git_issue_800.cpp ../../test/build//boost_unit_test_framework ] - [ run git_issue_845.cpp ../../test/build//boost_unit_test_framework ] - [ run scipy_issue_14901.cpp ../../test/build//boost_unit_test_framework ] - [ run scipy_issue_14901_ncf.cpp ../../test/build//boost_unit_test_framework ] - [ run scipy_issue_15101.cpp ../../test/build//boost_unit_test_framework ] - [ run scipy_issue_17146.cpp ../../test/build//boost_unit_test_framework ] - [ run scipy_issue_17388.cpp ../../test/build//boost_unit_test_framework ] - [ run scipy_issue_17916.cpp ../../test/build//boost_unit_test_framework ] - [ run scipy_issue_17916_nct.cpp ../../test/build//boost_unit_test_framework ] - [ run scipy_issue_18302.cpp ../../test/build//boost_unit_test_framework ] - [ run scipy_issue_18511.cpp ../../test/build//boost_unit_test_framework ] + [ run git_issue_800.cpp /boost/test//boost_unit_test_framework ] + [ run git_issue_845.cpp /boost/test//boost_unit_test_framework ] + [ run scipy_issue_14901.cpp /boost/test//boost_unit_test_framework ] + [ run scipy_issue_14901_ncf.cpp /boost/test//boost_unit_test_framework ] + [ run scipy_issue_15101.cpp /boost/test//boost_unit_test_framework ] + [ run scipy_issue_17146.cpp /boost/test//boost_unit_test_framework ] + [ run scipy_issue_17388.cpp /boost/test//boost_unit_test_framework ] + [ run scipy_issue_17916.cpp /boost/test//boost_unit_test_framework ] + [ run scipy_issue_17916_nct.cpp /boost/test//boost_unit_test_framework ] + [ run scipy_issue_18302.cpp /boost/test//boost_unit_test_framework ] + [ run scipy_issue_18511.cpp /boost/test//boost_unit_test_framework ] [ compile scipy_issue_19762.cpp ] [ run git_issue_1120.cpp ] ; -test-suite new_floats : +test-suite new_floats : [ compile compile_test/float32.cpp ] [ compile compile_test/float64.cpp ] [ compile compile_test/float128.cpp ] - [ run test_float_io.cpp : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] ] - [ run test_float_io.cpp : : : BOOST_MATH_TEST_IO_AS_INTEL_QUAD=1 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] : test_float_io_quad ] + [ run test_float_io.cpp : : : $(float128_type) ] + [ run test_float_io.cpp : : : BOOST_MATH_TEST_IO_AS_INTEL_QUAD=1 $(float128_type) : test_float_io_quad ] ; test-suite mp : - [ run test_nc_t_quad.cpp pch ../../test/build//boost_unit_test_framework : : : release gcc-mingw:-Wa,-mbig-obj off [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] - [ run test_polynomial.cpp ../../test/build//boost_unit_test_framework : : : TEST1 : test_polynomial_1 ] - [ run test_polynomial.cpp ../../test/build//boost_unit_test_framework : : : TEST2 : test_polynomial_2 ] - [ run test_polynomial.cpp ../../test/build//boost_unit_test_framework : : : TEST3 : test_polynomial_3 ] + [ run test_nc_t_quad.cpp pch /boost/test//boost_unit_test_framework : : : release gcc-mingw:-Wa,-mbig-obj off [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] + [ run test_polynomial.cpp /boost/test//boost_unit_test_framework : : : TEST1 : test_polynomial_1 ] + [ run test_polynomial.cpp /boost/test//boost_unit_test_framework : : : TEST2 : test_polynomial_2 ] + [ run test_polynomial.cpp /boost/test//boost_unit_test_framework : : : TEST3 : test_polynomial_3 ] [ run test_estrin.cpp ] [ run polynomial_concept_check.cpp ] @@ -908,13 +932,13 @@ test-suite misc : ../build//boost_math_tr1f ../build//boost_math_c99 ../build//boost_math_c99f - ../../test/build//boost_unit_test_framework + /boost/test//boost_unit_test_framework ] [ run test_tr1.cpp ../build//boost_math_tr1l ../build//boost_math_c99l - ../../test/build//boost_unit_test_framework + /boost/test//boost_unit_test_framework : : : TEST_LD=1 [ check-target-builds ../config//has_long_double_support "long double support" : : no ] @@ -927,7 +951,7 @@ test-suite misc : ../build//boost_math_tr1f ../build//boost_math_c99 ../build//boost_math_c99f - ../../test/build//boost_unit_test_framework + /boost/test//boost_unit_test_framework : : : #requirements : test_tr1_c @@ -936,23 +960,23 @@ test-suite misc : [ run test_tr1.c ../build//boost_math_tr1l ../build//boost_math_c99l - ../../test/build//boost_unit_test_framework + /boost/test//boost_unit_test_framework : : : TEST_LD=1 [ check-target-builds ../config//has_long_double_support "long double support" : : no ] : test_tr1_c_long_double ] - [ run test_constants.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] - [ run simple_continued_fraction_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] - [ run centered_continued_fraction_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] - [ run luroth_expansion_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] - [ run engel_expansion_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] - [ run test_classify.cpp pch ../../test/build//boost_unit_test_framework : : : msvc:/bigobj ] - [ run test_error_handling.cpp ../../test/build//boost_unit_test_framework ] - [ run legendre_stieltjes_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_range_based_for ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] - [ run test_minima.cpp pch ../../test/build//boost_unit_test_framework ] - [ run test_rationals.cpp ../../test/build//boost_unit_test_framework + [ run test_constants.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) ] + [ run simple_continued_fraction_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) ] + [ run centered_continued_fraction_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) ] + [ run luroth_expansion_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) ] + [ run engel_expansion_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) ] + [ run test_classify.cpp pch /boost/test//boost_unit_test_framework : : : msvc:/bigobj ] + [ run test_error_handling.cpp /boost/test//boost_unit_test_framework ] + [ run legendre_stieltjes_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_range_based_for ] $(float128_type) ] + [ run test_minima.cpp pch /boost/test//boost_unit_test_framework ] + [ run test_rationals.cpp /boost/test//boost_unit_test_framework test_rational_instances/test_rational_double1.cpp test_rational_instances/test_rational_double2.cpp test_rational_instances/test_rational_double3.cpp @@ -973,96 +997,96 @@ test-suite misc : test_rational_instances/test_rational_real_concept4.cpp test_rational_instances/test_rational_real_concept5.cpp ] - [ run test_policy.cpp ../../test/build//boost_unit_test_framework ] - [ run test_policy_2.cpp ../../test/build//boost_unit_test_framework ] - [ run test_policy_3.cpp ../../test/build//boost_unit_test_framework ] - [ run test_policy_4.cpp ../../test/build//boost_unit_test_framework ] - [ run test_policy_5.cpp ../../test/build//boost_unit_test_framework ] - [ run test_policy_6.cpp ../../test/build//boost_unit_test_framework ] - [ run test_policy_7.cpp ../../test/build//boost_unit_test_framework ] - [ run test_policy_8.cpp ../../test/build//boost_unit_test_framework ] + [ run test_policy.cpp /boost/test//boost_unit_test_framework ] + [ run test_policy_2.cpp /boost/test//boost_unit_test_framework ] + [ run test_policy_3.cpp /boost/test//boost_unit_test_framework ] + [ run test_policy_4.cpp /boost/test//boost_unit_test_framework ] + [ run test_policy_5.cpp /boost/test//boost_unit_test_framework ] + [ run test_policy_6.cpp /boost/test//boost_unit_test_framework ] + [ run test_policy_7.cpp /boost/test//boost_unit_test_framework ] + [ run test_policy_8.cpp /boost/test//boost_unit_test_framework ] [ compile test_policy_9.cpp ] - [ run test_policy_10.cpp ../../test/build//boost_unit_test_framework ] - [ run test_policy_sf.cpp ../../test/build//boost_unit_test_framework ] - [ run test_long_double_support.cpp ../../test/build//boost_unit_test_framework + [ run test_policy_10.cpp /boost/test//boost_unit_test_framework ] + [ run test_policy_sf.cpp /boost/test//boost_unit_test_framework ] + [ run test_long_double_support.cpp /boost/test//boost_unit_test_framework : : : [ check-target-builds ../config//has_long_double_support "long double support" : : no ] ] [ run test_recurrence.cpp : : : TEST=1 [ requires cxx11_unified_initialization_syntax cxx11_hdr_tuple cxx11_auto_declarations cxx11_decltype ] msvc:/bigobj : test_recurrence_1 ] - [ run test_recurrence.cpp : : : TEST=2 release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] [ requires cxx11_unified_initialization_syntax cxx11_hdr_tuple cxx11_auto_declarations cxx11_decltype ] : test_recurrence_2 ] - [ run test_recurrence.cpp : : : TEST=3 release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] [ requires cxx11_unified_initialization_syntax cxx11_hdr_tuple cxx11_auto_declarations cxx11_decltype ] : test_recurrence_3 ] + [ run test_recurrence.cpp : : : TEST=2 release $(float128_type) [ requires cxx11_unified_initialization_syntax cxx11_hdr_tuple cxx11_auto_declarations cxx11_decltype ] : test_recurrence_2 ] + [ run test_recurrence.cpp : : : TEST=3 release $(float128_type) [ requires cxx11_unified_initialization_syntax cxx11_hdr_tuple cxx11_auto_declarations cxx11_decltype ] : test_recurrence_3 ] [ run test_print_info_on_type.cpp ] - [ run univariate_statistics_test.cpp ../../test/build//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx17_if_constexpr cxx17_std_apply ] ] - [ run univariate_statistics_backwards_compatible_test.cpp ../../test/build//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ] ] - [ run ooura_fourier_integral_test.cpp ../../test/build//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : "-Bstatic -lquadmath -Bdynamic" ] [ requires cxx17_if_constexpr cxx17_std_apply ] ] + [ run univariate_statistics_test.cpp /boost/test//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx17_if_constexpr cxx17_std_apply ] ] + [ run univariate_statistics_backwards_compatible_test.cpp /boost/test//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ] ] + [ run ooura_fourier_integral_test.cpp /boost/test//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] $(float128_type) [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run empirical_cumulative_distribution_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] - [ run norms_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] + [ run norms_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run signal_statistics_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run anderson_darling_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run ljung_box_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run cubic_roots_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run quartic_roots_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] - [ run test_t_test.cpp : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ] ] - [ run test_z_test.cpp : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ] ] + [ run test_t_test.cpp : : : $(float128_type) [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ] ] + [ run test_z_test.cpp : : : $(float128_type) [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ] ] [ run bivariate_statistics_test.cpp : : : [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] ] [ run linear_regression_test.cpp : : : [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ] ] [ run test_runs_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] - [ run test_chatterjee_correlation.cpp ../../test/build//boost_unit_test_framework ] - [ run test_rank.cpp ../../test/build//boost_unit_test_framework ] - [ run lanczos_smoothing_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] - [ run condition_number_test.cpp ../../test/build//boost_unit_test_framework : : : TEST=1 msvc:/bigobj [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : "-Bstatic -lquadmath -Bdynamic" ] : condition_number_test_1 ] - [ run condition_number_test.cpp ../../test/build//boost_unit_test_framework : : : TEST=2 msvc:/bigobj [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : "-Bstatic -lquadmath -Bdynamic" ] : condition_number_test_2 ] - [ run condition_number_test.cpp ../../test/build//boost_unit_test_framework : : : TEST=3 msvc:/bigobj [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : "-Bstatic -lquadmath -Bdynamic" ] : condition_number_test_3 ] - [ run test_real_concept.cpp ../../test/build//boost_unit_test_framework ] - [ run test_remez.cpp pch ../../test/build//boost_unit_test_framework ] - [ run test_roots.cpp pch ../../test/build//boost_unit_test_framework ] - [ run test_root_iterations.cpp pch ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_tuple ] ] - [ run test_root_finding_concepts.cpp ../../test/build//boost_unit_test_framework ] - [ run test_toms748_solve.cpp pch ../../test/build//boost_unit_test_framework ] + [ run test_chatterjee_correlation.cpp /boost/test//boost_unit_test_framework ] + [ run test_rank.cpp /boost/test//boost_unit_test_framework ] + [ run lanczos_smoothing_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] + [ run condition_number_test.cpp /boost/test//boost_unit_test_framework : : : TEST=1 msvc:/bigobj $(float128_type) : condition_number_test_1 ] + [ run condition_number_test.cpp /boost/test//boost_unit_test_framework : : : TEST=2 msvc:/bigobj $(float128_type) : condition_number_test_2 ] + [ run condition_number_test.cpp /boost/test//boost_unit_test_framework : : : TEST=3 msvc:/bigobj $(float128_type) : condition_number_test_3 ] + [ run test_real_concept.cpp /boost/test//boost_unit_test_framework ] + [ run test_remez.cpp pch /boost/test//boost_unit_test_framework ] + [ run test_roots.cpp pch /boost/test//boost_unit_test_framework ] + [ run test_root_iterations.cpp pch /boost/test//boost_unit_test_framework : : : [ requires cxx11_hdr_tuple ] ] + [ run test_root_finding_concepts.cpp /boost/test//boost_unit_test_framework ] + [ run test_toms748_solve.cpp pch /boost/test//boost_unit_test_framework ] [ run compile_test/interpolators_cubic_spline_incl_test.cpp compile_test_main : : : [ requires cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] [ run compile_test/interpolators_barycentric_rational_incl_test.cpp compile_test_main : : : [ requires cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] [ run octonion_test.cpp - ../../test/build//boost_unit_test_framework ] + /boost/test//boost_unit_test_framework ] [ run octonion_test_simple.cpp ] [ run quaternion_constexpr_test.cpp ] [ run quaternion_test.cpp - ../../test/build//boost_unit_test_framework : : : msvc-14.0:off [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] + /boost/test//boost_unit_test_framework : : : msvc-14.0:off $(float128_type) ] [ run quaternion_mult_incl_test.cpp quaternion_mi1.cpp quaternion_mi2.cpp - ../../test/build//boost_unit_test_framework ] + /boost/test//boost_unit_test_framework ] # [ run __temporary_test.cpp test_instances//test_instances : : : always_show_run_output off ] ; test-suite interpolators : - [ run test_barycentric_rational.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations cxx11_unified_initialization_syntax ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] - [ run test_vector_barycentric_rational.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations cxx11_unified_initialization_syntax ] [ check-target-builds ../../multiprecision/config//has_eigen : : no ] ] - [ run cardinal_cubic_b_spline_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_smart_ptr cxx11_defaulted_functions ] off msvc:/bigobj release ] - [ run cardinal_b_spline_test.cpp : : : [ requires cxx11_auto_declarations cxx11_constexpr cxx11_smart_ptr cxx11_defaulted_functions ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] - [ run jacobi_test.cpp : : : [ requires cxx11_auto_declarations cxx11_constexpr cxx11_smart_ptr cxx11_defaulted_functions ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] - [ run gegenbauer_test.cpp : : : [ requires cxx11_auto_declarations cxx11_constexpr cxx11_smart_ptr cxx11_defaulted_functions ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] - [ run daubechies_scaling_test.cpp : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] ] - [ run daubechies_wavelet_test.cpp : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] ] - [ run fourier_transform_daubechies_test.cpp : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] ] - [ run wavelet_transform_test.cpp : : : msvc:/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] - [ run agm_test.cpp : : : msvc:/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] - [ run rsqrt_test.cpp : : : msvc:/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] - [ run cohen_acceleration_test.cpp : : : msvc:/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] + [ run test_barycentric_rational.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations cxx11_unified_initialization_syntax ] $(float128_type) ] + [ run test_vector_barycentric_rational.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations cxx11_unified_initialization_syntax ] [ check-target-builds ../../multiprecision/config//has_eigen : : no ] ] + [ run cardinal_cubic_b_spline_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_smart_ptr cxx11_defaulted_functions ] off msvc:/bigobj release ] + [ run cardinal_b_spline_test.cpp : : : [ requires cxx11_auto_declarations cxx11_constexpr cxx11_smart_ptr cxx11_defaulted_functions ] $(float128_type) ] + [ run jacobi_test.cpp : : : [ requires cxx11_auto_declarations cxx11_constexpr cxx11_smart_ptr cxx11_defaulted_functions ] $(float128_type) ] + [ run gegenbauer_test.cpp : : : [ requires cxx11_auto_declarations cxx11_constexpr cxx11_smart_ptr cxx11_defaulted_functions ] $(float128_type) ] + [ run daubechies_scaling_test.cpp /boost/hana//boost_hana : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] ] + [ run daubechies_wavelet_test.cpp /boost/hana//boost_hana : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] ] + [ run fourier_transform_daubechies_test.cpp : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] ] + [ run wavelet_transform_test.cpp /boost/hana//boost_hana : : : msvc:/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] + [ run agm_test.cpp : : : msvc:/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) ] + [ run rsqrt_test.cpp : : : msvc:/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) ] + [ run cohen_acceleration_test.cpp : : : msvc:/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) ] [ compile compile_test/filters_daubechies_incl_test.cpp : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ compile compile_test/sf_daubechies_scaling_incl_test.cpp : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run whittaker_shannon_test.cpp : : : [ requires cxx11_auto_declarations cxx11_constexpr cxx11_smart_ptr cxx11_defaulted_functions ] ] [ run cardinal_quadratic_b_spline_test.cpp : : : [ requires cxx11_auto_declarations cxx11_constexpr cxx11_smart_ptr cxx11_defaulted_functions ] ] - [ run cardinal_quintic_b_spline_test.cpp : : : [ requires cxx11_auto_declarations cxx11_constexpr cxx11_smart_ptr cxx11_defaulted_functions ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] - [ run makima_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] - [ run pchip_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] - [ run septic_hermite_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] - [ run quintic_hermite_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] - [ run cubic_hermite_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] + [ run cardinal_quintic_b_spline_test.cpp : : : [ requires cxx11_auto_declarations cxx11_constexpr cxx11_smart_ptr cxx11_defaulted_functions ] $(float128_type) ] + [ run makima_test.cpp /boost/circular_buffer//boost_circular_buffer : : : [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) ] + [ run pchip_test.cpp /boost/circular_buffer//boost_circular_buffer : : : [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) ] + [ run septic_hermite_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) ] + [ run quintic_hermite_test.cpp /boost/circular_buffer//boost_circular_buffer : : : [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) ] + [ run cubic_hermite_test.cpp /boost/circular_buffer//boost_circular_buffer : : : [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) ] [ run bilinear_uniform_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] - [ run bezier_polynomial_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] ] - [ run catmull_rom_test.cpp ../../test/build//boost_unit_test_framework : : : TEST=1 [ requires cxx11_hdr_array cxx11_hdr_initializer_list ] : catmull_rom_test_1 ] - [ run catmull_rom_test.cpp ../../test/build//boost_unit_test_framework : : : TEST=2 [ requires cxx11_hdr_array cxx11_hdr_initializer_list ] : catmull_rom_test_2 ] - [ run catmull_rom_test.cpp ../../test/build//boost_unit_test_framework : : : TEST=3 [ requires cxx11_hdr_array cxx11_hdr_initializer_list ] : catmull_rom_test_3 ] + [ run bezier_polynomial_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) ] + [ run catmull_rom_test.cpp /boost/test//boost_unit_test_framework : : : TEST=1 [ requires cxx11_hdr_array cxx11_hdr_initializer_list ] : catmull_rom_test_1 ] + [ run catmull_rom_test.cpp /boost/test//boost_unit_test_framework : : : TEST=2 [ requires cxx11_hdr_array cxx11_hdr_initializer_list ] : catmull_rom_test_2 ] + [ run catmull_rom_test.cpp /boost/test//boost_unit_test_framework : : : TEST=3 [ requires cxx11_hdr_array cxx11_hdr_initializer_list ] : catmull_rom_test_3 ] [ run compile_test/interpolators_catmull_rom_incl_test.cpp compile_test_main : : : [ requires cxx11_hdr_array cxx11_hdr_initializer_list ] ] [ run compile_test/interpolators_catmull_rom_concept_test.cpp compile_test_main : : : [ requires cxx11_hdr_array cxx11_hdr_initializer_list ] ] [ run test_standalone_asserts.cpp ] @@ -1076,118 +1100,118 @@ test-suite interpolators : ; test-suite quadrature : - [ run tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : msvc:/bigobj TEST1 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : msvc:/bigobj TEST1 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] : tanh_sinh_quadrature_test_1 ] - [ run tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : msvc:/bigobj TEST1A [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : msvc:/bigobj TEST1A $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] : tanh_sinh_quadrature_test_1a ] - [ run tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : release msvc:/bigobj TEST1B [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : release msvc:/bigobj TEST1B $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] : tanh_sinh_quadrature_test_1b ] - [ run tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : msvc:/bigobj TEST2 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : msvc:/bigobj TEST2 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] : tanh_sinh_quadrature_test_2 ] - [ run tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : release msvc:/bigobj TEST2A [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : release msvc:/bigobj TEST2A $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] : tanh_sinh_quadrature_test_2a ] - [ run tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : msvc:/bigobj TEST3 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : msvc:/bigobj TEST3 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : tanh_sinh_quadrature_test_3 ] - [ run tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : release msvc:/bigobj TEST3A [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : release msvc:/bigobj TEST3A $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : tanh_sinh_quadrature_test_3a ] - [ run tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : release msvc:/bigobj TEST4 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : release msvc:/bigobj TEST4 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : tanh_sinh_quadrature_test_4 ] - [ run tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : release msvc:/bigobj TEST5 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : release msvc:/bigobj TEST5 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : tanh_sinh_quadrature_test_5 ] - [ run tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : msvc:/bigobj TEST6 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : msvc:/bigobj TEST6 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : tanh_sinh_quadrature_test_6 ] - [ run tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : release msvc:/bigobj TEST6A [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : release msvc:/bigobj TEST6A $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : tanh_sinh_quadrature_test_6a ] - [ run tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : release msvc:/bigobj TEST7 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : release msvc:/bigobj TEST7 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : tanh_sinh_quadrature_test_7 ] - [ run tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : release msvc:/bigobj TEST8 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : release msvc:/bigobj TEST8 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : tanh_sinh_quadrature_test_8 ] - [ run tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework + [ run tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework : : : release msvc:/bigobj TEST9 [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : tanh_sinh_quadrature_test_9 ] [ run tanh_sinh_mpfr.cpp ../tools//mpfr ../tools//gmp : : : [ check-target-builds ../config//has_mpfr : : no ] [ check-target-builds ../config//has_gmp : : no ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] release clang:-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] - [ run sinh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] ] - [ run exp_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : TEST1 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] : exp_sinh_quadrature_test_1 ] + [ run sinh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : release $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] ] + [ run exp_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : TEST1 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] : exp_sinh_quadrature_test_1 ] - [ run exp_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : release TEST2 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run exp_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : release TEST2 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] : exp_sinh_quadrature_test_2 ] - [ run exp_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : TEST3 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run exp_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : TEST3 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : exp_sinh_quadrature_test_3 ] - [ run exp_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : release TEST4 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run exp_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : release TEST4 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : exp_sinh_quadrature_test_4 ] - [ run exp_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : release TEST5 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run exp_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : release TEST5 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : exp_sinh_quadrature_test_5 ] - [ run exp_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : release TEST6 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run exp_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : release TEST6 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : exp_sinh_quadrature_test_6 ] - [ run exp_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : release TEST7 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run exp_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : release TEST7 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : exp_sinh_quadrature_test_7 ] - [ run exp_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : release TEST8 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run exp_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : release TEST8 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : exp_sinh_quadrature_test_8 ] - [ run exp_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : release TEST9 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run exp_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : release TEST9 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : exp_sinh_quadrature_test_9 ] - [ run exp_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework - : : : release TEST10 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run exp_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework + : : : release TEST10 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : exp_sinh_quadrature_test_10 ] - [ run gauss_quadrature_test.cpp : : : TEST1 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run gauss_quadrature_test.cpp : : : TEST1 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] off msvc:/bigobj release : gauss_quadrature_test_1 ] - [ run gauss_quadrature_test.cpp : : : TEST2 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run gauss_quadrature_test.cpp : : : TEST2 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] off msvc:/bigobj release [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : gauss_quadrature_test_2 ] - [ run gauss_quadrature_test.cpp : : : TEST3 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run gauss_quadrature_test.cpp : : : TEST3 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] off msvc:/bigobj release [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : gauss_quadrature_test_3 ] - [ run gauss_kronrod_quadrature_test.cpp : : : TEST1 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run gauss_kronrod_quadrature_test.cpp : : : TEST1 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] off msvc:/bigobj release : gauss_kronrod_quadrature_test_1 ] - [ run gauss_kronrod_quadrature_test.cpp : : : TEST1A [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run gauss_kronrod_quadrature_test.cpp : : : TEST1A $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] off msvc:/bigobj release [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : gauss_kronrod_quadrature_test_1a ] - [ run gauss_kronrod_quadrature_test.cpp : : : TEST2 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run gauss_kronrod_quadrature_test.cpp : : : TEST2 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] off msvc:/bigobj release [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : gauss_kronrod_quadrature_test_2 ] - [ run gauss_kronrod_quadrature_test.cpp : : : TEST3 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run gauss_kronrod_quadrature_test.cpp : : : TEST3 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] off msvc:/bigobj release [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : gauss_kronrod_quadrature_test_3 ] - [ run adaptive_gauss_kronrod_quadrature_test.cpp : : : TEST1 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run adaptive_gauss_kronrod_quadrature_test.cpp : : : TEST1 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] off msvc:/bigobj release : adaptive_gauss_quadrature_test_1 ] - [ run adaptive_gauss_kronrod_quadrature_test.cpp : : : TEST1A [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run adaptive_gauss_kronrod_quadrature_test.cpp : : : TEST1A $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] off msvc:/bigobj release [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : adaptive_gauss_quadrature_test_1a ] - [ run adaptive_gauss_kronrod_quadrature_test.cpp : : : TEST2 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run adaptive_gauss_kronrod_quadrature_test.cpp : : : TEST2 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] off msvc:/bigobj release [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : adaptive_gauss_quadrature_test_2 ] - [ run adaptive_gauss_kronrod_quadrature_test.cpp : : : TEST3 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] + [ run adaptive_gauss_kronrod_quadrature_test.cpp : : : TEST3 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] off msvc:/bigobj release [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : adaptive_gauss_quadrature_test_3 ] [ run naive_monte_carlo_test.cpp : : : @@ -1290,28 +1314,28 @@ test-suite quadrature : [ compile compile_test/gauss_concept_test.cpp : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] [ compile compile_test/gauss_kronrod_concept_test.cpp : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] [ run git_issue_898.cpp ] - [ run git_issue_1075.cpp : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : "-Bstatic -lquadmath -Bdynamic" ] ] + [ run git_issue_1075.cpp : : : $(float128_type) ] - [ run test_trapezoidal.cpp ../../test/build//boost_unit_test_framework : : : + [ run test_trapezoidal.cpp /boost/test//boost_unit_test_framework : : : release [ requires cxx11_lambdas cxx11_auto_declarations cxx11_decltype cxx11_unified_initialization_syntax cxx11_variadic_templates ] - [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : "-Bstatic -lquadmath -Bdynamic" ] ] + $(float128_type) ] ; test-suite autodiff : - [ run test_numerical_differentiation.cpp ../../test/build//boost_unit_test_framework : : : msvc:/bigobj [ requires cxx11_auto_declarations cxx11_constexpr ] ] + [ run test_numerical_differentiation.cpp /boost/test//boost_unit_test_framework : : : msvc:/bigobj [ requires cxx11_auto_declarations cxx11_constexpr ] ] [ run compile_test/diff_numerical_differentiation_incl_test.cpp compile_test_main : : : [ requires cxx11_auto_declarations cxx11_constexpr ] ] [ compile compile_test/diff_numerical_differentiation_concept_test.cpp : [ requires cxx11_auto_declarations cxx11_constexpr ] ] - [ run test_autodiff_1.cpp ../../test/build//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_inline_namespaces ] ] - [ run test_autodiff_2.cpp ../../test/build//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_inline_namespaces ] ] - [ run test_autodiff_3.cpp ../../test/build//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_inline_namespaces ] ] - [ run test_autodiff_4.cpp ../../test/build//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_inline_namespaces ] ] - [ run test_autodiff_5.cpp ../../test/build//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_inline_namespaces ] ] - [ run test_autodiff_6.cpp ../../test/build//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_inline_namespaces ] ] - [ run test_autodiff_7.cpp ../../test/build//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_inline_namespaces ] ] - [ run test_autodiff_8.cpp ../../test/build//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ check-target-builds ../config//is_ci_standalone_run "Standalone CI run" : no ] [ requires cxx11_inline_namespaces ] ] - [ compile compile_test/diff_autodiff_incl_test.cpp : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_inline_namespaces ] ] - [ compile compile_test/diff_finite_difference_incl_test.cpp : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_inline_namespaces ] ] - [ compile compile_test/diff_lanczos_smoothing_incl_test.cpp : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_inline_namespaces ] ] + [ run test_autodiff_1.cpp /boost/test//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_inline_namespaces ] ] + [ run test_autodiff_2.cpp /boost/test//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_inline_namespaces ] ] + [ run test_autodiff_3.cpp /boost/test//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_inline_namespaces ] ] + [ run test_autodiff_4.cpp /boost/test//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_inline_namespaces ] ] + [ run test_autodiff_5.cpp /boost/test//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_inline_namespaces ] ] + [ run test_autodiff_6.cpp /boost/test//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_inline_namespaces ] ] + [ run test_autodiff_7.cpp /boost/test//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_inline_namespaces ] ] + [ run test_autodiff_8.cpp /boost/test//boost_unit_test_framework : : : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ check-target-builds ../config//is_ci_standalone_run "Standalone CI run" : no ] [ requires cxx11_inline_namespaces ] ] + [ compile compile_test/diff_autodiff_incl_test.cpp : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_inline_namespaces ] ] + [ compile compile_test/diff_finite_difference_incl_test.cpp : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_inline_namespaces ] ] + [ compile compile_test/diff_lanczos_smoothing_incl_test.cpp : gcc-mingw:-Wa,-mbig-obj off msvc:/bigobj release [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : no ] [ requires cxx11_inline_namespaces ] ] ; # @@ -1320,18 +1344,18 @@ test-suite autodiff : # too much time: # test-suite long-running-tests : - [ run test_0F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=3 release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] : test_0F1_3 ] - [ run test_0F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=4 release : test_0F1_4 ] - [ run test_1F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=5 clang:-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_1F1_real_concept ] - [ run test_1F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=6 release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] clang:-Wno-literal-range : test_1F1_quad ] - [ run test_1F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=7 release clang:-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_1F1_dec_40 ] - [ run test_1F1_regularized.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=6 release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] clang:-Wno-literal-range : test_1F1_regularized_quad ] - [ run test_1F1_regularized.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=7 release clang:-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_1F1_regularized_dec_40 ] - [ run test_1F1_log.cpp ../../test/build//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=6 release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] clang:-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_1F1_log_quad ] - [ run test_1F1_log.cpp ../../test/build//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=7 release clang:-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_1F1_log_dec_40 ] - [ run test_pFq.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=6 release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] clang:-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_pFq_quad ] - [ run test_pFq.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=7 release clang:-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_pFq_dec_40 ] - [ run test_pFq_precision.cpp ../tools//mpfr ../tools//gmp ../../test/build//boost_unit_test_framework /boost/system//boost_system /boost/chrono//boost_chrono : : : [ check-target-builds ../config//has_mpfr : : no ] [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] release clang:-Wno-literal-range ] + [ run test_0F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=3 release $(float128_type) : test_0F1_3 ] + [ run test_0F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=4 release : test_0F1_4 ] + [ run test_1F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=5 clang:-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_1F1_real_concept ] + [ run test_1F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=6 release $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] clang:-Wno-literal-range : test_1F1_quad ] + [ run test_1F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=7 release clang:-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_1F1_dec_40 ] + [ run test_1F1_regularized.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=6 release $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] clang:-Wno-literal-range : test_1F1_regularized_quad ] + [ run test_1F1_regularized.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=7 release clang:-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_1F1_regularized_dec_40 ] + [ run test_1F1_log.cpp /boost/test//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=6 release $(float128_type) clang:-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_1F1_log_quad ] + [ run test_1F1_log.cpp /boost/test//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=7 release clang:-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_1F1_log_dec_40 ] + [ run test_pFq.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=6 release $(float128_type) clang:-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_pFq_quad ] + [ run test_pFq.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] TEST=7 release clang:-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : test_pFq_dec_40 ] + [ run test_pFq_precision.cpp ../tools//mpfr ../tools//gmp /boost/test//boost_unit_test_framework /boost/system//boost_system /boost/chrono//boost_chrono : : : [ check-target-builds ../config//has_mpfr : : no ] [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] release clang:-Wno-literal-range ] [ run test_constant_generate.cpp : : : release USE_CPP_FLOAT=1 off:no ] ; @@ -1349,9 +1373,7 @@ rule get_float128_tests : # command line : # input files : # requirements - [ check-target-builds ../config//has_intel_quad "Intel _Quad datatype support" : -Qoption,cpp,--extended_float_type BOOST_MATH_USE_FLOAT128 ] - [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] - [ check-target-builds ../config//has_128bit_floatmax_t "128-bit floatmax_t" : : no ] + $(float128_type) BOOST_ALL_NO_LIB : $(source:B)_floatmax_t ] ; } @@ -1380,6 +1402,10 @@ test-suite concepts : [ run compile_test/dist_inv_chi_sq_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] [ run compile_test/dist_hyperexponential_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] [ run compile_test/dist_hypergeo_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] + [ run compile_test/dist_landau_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] + [ run compile_test/dist_mapairy_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] + [ run compile_test/dist_holtsmark_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] + [ run compile_test/dist_saspoint5_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] [ run compile_test/dist_laplace_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] [ run compile_test/dist_logistic_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] [ run compile_test/dist_lognormal_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] @@ -1500,7 +1526,7 @@ test-suite concepts : [ compile compile_test/std_real_concept_check.cpp : EMULATE128 [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] : std_real_concept_check_128 ] [ run compile_test/cstdfloat_concept_check_1.cpp : : : [ check-target-builds ../config//has_intel_quad "Intel _Quad datatype support" : -Qoption,cpp,--extended_float_type ] - [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] + $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] [ run compile_test/cstdfloat_concept_check_2.cpp : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] [ run compile_test/cstdfloat_concept_check_3.cpp : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] [ run compile_test/cstdfloat_concept_check_4.cpp : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] @@ -1509,7 +1535,7 @@ test-suite concepts : [ compile compile_test/cstdfloat_iostream_incl_test.cpp : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] [ compile compile_test/cstdfloat_limits_incl_test.cpp : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] [ compile compile_test/cstdfloat_types_incl_test.cpp : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] - [ run test_cstdfloat.cpp ../../test/build//boost_unit_test_framework : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : -lquadmath ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] + [ run test_cstdfloat.cpp /boost/test//boost_unit_test_framework : : : $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] [ run compile_test/sf_airy_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] [ run compile_test/sf_hankel_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] [ run compile_test/sf_jacobi_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : no ] ] diff --git a/test/beta_med_data.ipp b/test/beta_med_data.ipp index b1f35d98e1..eb3e884b8c 100644 --- a/test/beta_med_data.ipp +++ b/test/beta_med_data.ipp @@ -3,6 +3,10 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifdef __CUDACC__ +#pragma nv_diag_suppress 221 +#endif + static const std::array::type, 3>, 1830> beta_med_data = { { {{ SC_(0.4883005917072296142578125), SC_(0.4883005917072296142578125), SC_(3.245912809500479157065104747353807392371) }}, {{ SC_(3.5808107852935791015625), SC_(0.4883005917072296142578125), SC_(1.007653173802923954909901438393379243537) }}, diff --git a/test/ccmath_abs_test.cpp b/test/ccmath_abs_test.cpp index 467b7a5a15..6f09b3e0b3 100644 --- a/test/ccmath_abs_test.cpp +++ b/test/ccmath_abs_test.cpp @@ -76,9 +76,7 @@ int main() // Types that are convertible to int test(); -#if CHAR_MIN != 0 - test(); -#endif + test(); // fabs fabs_test(); diff --git a/test/ccmath_isinf_test.cpp b/test/ccmath_isinf_test.cpp index 1f6b61d2fa..3ee5d1375a 100644 --- a/test/ccmath_isinf_test.cpp +++ b/test/ccmath_isinf_test.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff --git a/test/ccmath_sqrt_test.cpp b/test/ccmath_sqrt_test.cpp index af2911bb85..eb2cf039dc 100644 --- a/test/ccmath_sqrt_test.cpp +++ b/test/ccmath_sqrt_test.cpp @@ -4,6 +4,7 @@ // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) #include +#include #include #include #include diff --git a/test/check_cmake_version.cpp b/test/check_cmake_version.cpp new file mode 100644 index 0000000000..2fd4648368 --- /dev/null +++ b/test/check_cmake_version.cpp @@ -0,0 +1,27 @@ +// Check whether the version in CMakeLists.txt is up to date +// +// Copyright 2018 Peter Dimov +// +// Distributed under the Boost Software License, Version 1.0. +// +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt + +#include +#include +#include + +int main( int ac, char const* av[] ) +{ + BOOST_TEST_EQ( ac, 2 ); + + if( ac >= 2 ) + { + char version[ 64 ]; + std::sprintf( version, "%d.%d.%d", BOOST_VERSION / 100000, BOOST_VERSION / 100 % 1000, BOOST_VERSION % 100 ); + + BOOST_TEST_CSTR_EQ( av[1], version ); + } + + return boost::report_errors(); +} diff --git a/test/compile_test/CMakeLists.txt b/test/compile_test/CMakeLists.txt index d4d3a5dcdd..acfa292228 100644 --- a/test/compile_test/CMakeLists.txt +++ b/test/compile_test/CMakeLists.txt @@ -2,7 +2,6 @@ # Distributed under the Boost Software License, Version 1.0. # https://www.boost.org/LICENSE_1_0.txt -file(GLOB SOURCES "*.cpp") -add_library(boost_math-compile_tests STATIC ${SOURCES}) -target_compile_features(boost_math-compile_tests PRIVATE cxx_std_17) -target_link_libraries(boost_math-compile_tests PUBLIC Boost::math) +include_directories(../../include_private) +file(GLOB SRC_FILES CONFIGURE_DEPENDS "*.cpp") +boost_test(TYPE "compile" SOURCES ${SRC_FILES} COMPILE_DEFINITIONS BOOST_MATH_STANDALONE COMPILE_FEATURES cxx_std_17 LINK_LIBRARIES Boost::math Boost::multiprecision Boost::numeric_ublas Boost::unit_test_framework ) diff --git a/test/compile_test/dist_holtsmark_incl_test.cpp b/test/compile_test/dist_holtsmark_incl_test.cpp new file mode 100644 index 0000000000..f90bbb9f07 --- /dev/null +++ b/test/compile_test/dist_holtsmark_incl_test.cpp @@ -0,0 +1,26 @@ +// Copyright John Maddock 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Basic sanity check that header +// #includes all the files that it needs to. +// +#define BOOST_MATH_ASSERT_UNDEFINED_POLICY false +#include +// +// Note this header includes no other headers, this is +// important if this test is to be meaningful: +// +#include "test_compile_result.hpp" + +void compile_and_link_test() +{ + TEST_DIST_FUNC(holtsmark) +} + +template class boost::math::holtsmark_distribution >; +template class boost::math::holtsmark_distribution >; +#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS +template class boost::math::holtsmark_distribution >; +#endif diff --git a/test/compile_test/dist_landau_incl_test.cpp b/test/compile_test/dist_landau_incl_test.cpp new file mode 100644 index 0000000000..5b63a710ab --- /dev/null +++ b/test/compile_test/dist_landau_incl_test.cpp @@ -0,0 +1,26 @@ +// Copyright John Maddock 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Basic sanity check that header +// #includes all the files that it needs to. +// +#define BOOST_MATH_ASSERT_UNDEFINED_POLICY false +#include +// +// Note this header includes no other headers, this is +// important if this test is to be meaningful: +// +#include "test_compile_result.hpp" + +void compile_and_link_test() +{ + TEST_DIST_FUNC(landau) +} + +template class boost::math::landau_distribution >; +template class boost::math::landau_distribution >; +#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS +template class boost::math::landau_distribution >; +#endif diff --git a/test/compile_test/dist_mapairy_incl_test.cpp b/test/compile_test/dist_mapairy_incl_test.cpp new file mode 100644 index 0000000000..7b953711dd --- /dev/null +++ b/test/compile_test/dist_mapairy_incl_test.cpp @@ -0,0 +1,26 @@ +// Copyright John Maddock 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Basic sanity check that header +// #includes all the files that it needs to. +// +#define BOOST_MATH_ASSERT_UNDEFINED_POLICY false +#include +// +// Note this header includes no other headers, this is +// important if this test is to be meaningful: +// +#include "test_compile_result.hpp" + +void compile_and_link_test() +{ + TEST_DIST_FUNC(mapairy) +} + +template class boost::math::mapairy_distribution >; +template class boost::math::mapairy_distribution >; +#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS +template class boost::math::mapairy_distribution >; +#endif diff --git a/test/compile_test/dist_saspoint5_incl_test.cpp b/test/compile_test/dist_saspoint5_incl_test.cpp new file mode 100644 index 0000000000..e48d3691df --- /dev/null +++ b/test/compile_test/dist_saspoint5_incl_test.cpp @@ -0,0 +1,26 @@ +// Copyright John Maddock 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Basic sanity check that header +// #includes all the files that it needs to. +// +#define BOOST_MATH_ASSERT_UNDEFINED_POLICY false +#include +// +// Note this header includes no other headers, this is +// important if this test is to be meaningful: +// +#include "test_compile_result.hpp" + +void compile_and_link_test() +{ + TEST_DIST_FUNC(saspoint5) +} + +template class boost::math::saspoint5_distribution >; +template class boost::math::saspoint5_distribution >; +#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS +template class boost::math::saspoint5_distribution >; +#endif diff --git a/test/compile_test/instantiate.hpp b/test/compile_test/instantiate.hpp index bdb6d515cf..85c85f785d 100644 --- a/test/compile_test/instantiate.hpp +++ b/test/compile_test/instantiate.hpp @@ -28,6 +28,7 @@ template bool instantiate_mixed_runner_result::value; #include #include +#include #include #if !defined(BOOST_MATH_NO_DISTRIBUTION_CONCEPT_TESTS) @@ -73,6 +74,33 @@ BOOST_MATH_DECLARE_DISTRIBUTIONS(double, test_policy) # define TEST_GROUP_15 #endif +template +void instantiate_for_fixed_precision_only(RealType, const std::true_type&) +{ + using namespace boost; + using namespace boost::math; + using namespace boost::math::concepts; + +#ifdef TEST_GROUP_1 +#if !defined(BOOST_MATH_NO_DISTRIBUTION_CONCEPT_TESTS) + function_requires > >(); + function_requires > >(); + function_requires >(); + function_requires > >(); + function_requires > >(); + function_requires >(); + function_requires > >(); + function_requires > >(); + function_requires >(); + function_requires > >(); + function_requires > >(); + function_requires >(); +#endif +#endif +} +template +void instantiate_for_fixed_precision_only(RealType, const std::false_type&){} + template void instantiate(RealType) { @@ -118,6 +146,9 @@ void instantiate(RealType) function_requires > >(); function_requires > >(); function_requires > >(); + + instantiate_for_fixed_precision_only(RealType(), std::integral_constant::is_specialized && (std::numeric_limits::digits <= 113) && (std::numeric_limits::radix == 2)>()); + #endif // !defined(BOOST_MATH_NO_DISTRIBUTION_CONCEPT_TESTS) #endif #ifndef BOOST_MATH_INSTANTIATE_MINIMUM diff --git a/test/compile_test/tools_remez_inc_test.cpp b/test/compile_test/tools_remez_inc_test.cpp index 938bd76615..98841b687a 100644 --- a/test/compile_test/tools_remez_inc_test.cpp +++ b/test/compile_test/tools_remez_inc_test.cpp @@ -8,5 +8,5 @@ // #ifndef BOOST_MATH_STANDALONE -#include +#include "../../include_private/boost/math/tools/remez.hpp" #endif diff --git a/test/compile_test/tools_solve_inc_test.cpp b/test/compile_test/tools_solve_inc_test.cpp index ee5f79d37a..715cdcc37a 100644 --- a/test/compile_test/tools_solve_inc_test.cpp +++ b/test/compile_test/tools_solve_inc_test.cpp @@ -7,5 +7,5 @@ // #includes all the files that it needs to. // #ifndef BOOST_MATH_STANDALONE -#include +#include "../../include_private/boost/math/tools/solve.hpp" #endif diff --git a/test/compile_test/tools_test_data_inc_test.cpp b/test/compile_test/tools_test_data_inc_test.cpp index 6be425d64d..66d4a1ccde 100644 --- a/test/compile_test/tools_test_data_inc_test.cpp +++ b/test/compile_test/tools_test_data_inc_test.cpp @@ -7,7 +7,7 @@ // #includes all the files that it needs to. // #ifndef BOOST_MATH_STANDALONE -#include +#include "../../include_private/boost/math/tools/test_data.hpp" #define T double diff --git a/test/compile_test/tools_test_inc_test.cpp b/test/compile_test/tools_test_inc_test.cpp index 10fea03a1d..01b0968d9b 100644 --- a/test/compile_test/tools_test_inc_test.cpp +++ b/test/compile_test/tools_test_inc_test.cpp @@ -9,7 +9,7 @@ #include #ifndef BOOST_MATH_STANDALONE -#include +#include "../../include_private/boost/math/tools/test.hpp" // // Note this header includes no other headers, this is // important if this test is to be meaningful: diff --git a/test/cuda_jamfile b/test/cuda_jamfile new file mode 100644 index 0000000000..02dcea8382 --- /dev/null +++ b/test/cuda_jamfile @@ -0,0 +1,389 @@ +# Copyright 2024 Matt Borland +# Distributed under the Boost Software License, Version 1.0. +# https://www.boost.org/LICENSE_1_0.txt + +import testing ; +import ../../config/checks/config : requires ; + +project : requirements + [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ] + ; + +# Quad +run test_exp_sinh_quad_float.cu ; +run test_exp_sinh_quad_double.cu ; +run test_sinh_sinh_quad_float.cu ; +run test_sinh_sinh_quad_double.cu ; + +# Distributions +run test_arcsine_cdf_double.cu ; +run test_arcsine_cdf_float.cu ; +run test_arcsine_pdf_double.cu ; +run test_arcsine_pdf_float.cu ; +run test_arcsine_quan_double.cu ; +run test_arcsine_quan_float.cu ; +run test_arcsine_range_support_double.cu ; + +run test_bernoulli_cdf_double.cu ; +run test_bernoulli_cdf_float.cu ; +run test_bernoulli_pdf_double.cu ; +run test_bernoulli_pdf_float.cu ; +run test_bernoulli_range_support_double.cu ; +run test_bernoulli_range_support_float.cu ; + +run test_beta_dist_cdf_double.cu ; +run test_beta_dist_cdf_float.cu ; +run test_beta_dist_pdf_double.cu ; +run test_beta_dist_pdf_float.cu ; +run test_beta_dist_quan_double.cu ; +run test_beta_dist_quan_float.cu ; + +run test_cauchy_cdf_double.cu ; +run test_cauchy_cdf_float.cu ; +run test_cauchy_pdf_double.cu ; +run test_cauchy_pdf_float.cu ; +run test_cauchy_quan_double.cu ; +run test_cauchy_quan_float.cu ; +run test_cauchy_range_support_double.cu ; +run test_cauchy_range_support_float.cu ; + +run test_chi_squared_cdf_double.cu ; +run test_chi_squared_cdf_float.cu ; +run test_chi_squared_pdf_double.cu ; +run test_chi_squared_pdf_float.cu ; +run test_chi_squared_quan_double.cu ; +run test_chi_squared_quan_float.cu ; + +run test_exponential_cdf_double.cu ; +run test_exponential_cdf_float.cu ; +run test_exponential_pdf_double.cu ; +run test_exponential_pdf_float.cu ; +run test_exponential_quan_double.cu ; +run test_exponential_quan_float.cu ; +run test_exponential_range_support_double.cu ; +run test_exponential_range_support_float.cu ; + +run test_extreme_value_cdf_double.cu ; +run test_extreme_value_cdf_float.cu ; +run test_extreme_value_pdf_double.cu ; +run test_extreme_value_pdf_float.cu ; +run test_extreme_value_quan_double.cu ; +run test_extreme_value_quan_float.cu ; + +run test_fisher_f_cdf_double.cu ; +run test_fisher_f_cdf_float.cu ; +run test_fisher_f_pdf_double.cu ; +run test_fisher_f_pdf_float.cu ; +run test_fisher_f_quan_double.cu ; +run test_fisher_f_quan_float.cu ; + +run test_gamma_dist_cdf_double.cu ; +run test_gamma_dist_cdf_float.cu ; +run test_gamma_dist_pdf_double.cu ; +run test_gamma_dist_pdf_float.cu ; +run test_gamma_dist_quan_double.cu ; +run test_gamma_dist_quan_float.cu ; + +run test_geometric_dist_cdf_double.cu ; +run test_geometric_dist_cdf_float.cu ; +run test_geometric_dist_pdf_double.cu ; +run test_geometric_dist_pdf_float.cu ; +run test_geometric_dist_quan_double.cu ; +run test_geometric_dist_quan_float.cu ; + +run test_holtsmark_cdf_double.cu ; +run test_holtsmark_cdf_float.cu ; +run test_holtsmark_pdf_double.cu ; +run test_holtsmark_pdf_float.cu ; + +run test_inverse_chi_squared_cdf_double.cu ; +run test_inverse_chi_squared_cdf_float.cu ; +run test_inverse_chi_squared_pdf_double.cu ; +run test_inverse_chi_squared_pdf_float.cu ; +run test_inverse_chi_squared_quan_double.cu ; +run test_inverse_chi_squared_quan_float.cu ; + +run test_inverse_gamma_cdf_double.cu ; +run test_inverse_gamma_cdf_float.cu ; +run test_inverse_gamma_pdf_double.cu ; +run test_inverse_gamma_pdf_float.cu ; +run test_inverse_gamma_quan_double.cu ; +run test_inverse_gamma_quan_float.cu ; + +run test_inverse_gaussian_cdf_double.cu ; +run test_inverse_gaussian_cdf_float.cu ; +run test_inverse_gaussian_pdf_double.cu ; +run test_inverse_gaussian_pdf_float.cu ; +run test_inverse_gaussian_quan_double.cu ; +run test_inverse_gaussian_quan_float.cu ; + +run test_landau_cdf_double.cu ; +run test_landau_cdf_float.cu ; +run test_landau_pdf_double.cu ; +run test_landau_pdf_float.cu ; +run test_landau_quan_double.cu; +run test_landau_quan_float.cu ; + +run test_laplace_cdf_double.cu ; +run test_laplace_cdf_float.cu ; +run test_laplace_pdf_double.cu ; +run test_laplace_pdf_float.cu ; +run test_laplace_quan_double.cu ; +run test_laplace_quan_float.cu ; + +run test_logistic_cdf_double.cu ; +run test_logistic_cdf_float.cu ; +run test_logistic_pdf_double.cu ; +run test_logistic_pdf_float.cu ; +run test_logistic_quan_double.cu ; +run test_logistic_quan_float.cu ; + +run test_lognormal_cdf_double.cu ; +run test_lognormal_cdf_float.cu ; +run test_lognormal_pdf_double.cu ; +run test_lognormal_pdf_float.cu ; +run test_lognormal_quan_double.cu ; +run test_lognormal_quan_float.cu ; + +run test_mapairy_cdf_double.cu ; +run test_mapairy_cdf_float.cu ; +run test_mapairy_pdf_double.cu ; +run test_mapairy_pdf_float.cu ; +run test_mapairy_quan_double.cu ; +run test_mapairy_quan_float.cu ; + +run test_nc_beta_cdf_double.cu ; +run test_nc_beta_cdf_float.cu ; +run test_nc_beta_pdf_double.cu ; +run test_nc_beta_pdf_float.cu ; +run test_nc_beta_quan_double.cu ; +run test_nc_beta_quan_float.cu ; + +run test_nc_f_cdf_double.cu ; +run test_nc_f_cdf_float.cu ; +run test_nc_f_pdf_double.cu ; +run test_nc_f_pdf_float.cu ; +run test_nc_f_quan_double.cu ; +run test_nc_f_quan_float.cu ; + +run test_nc_chi_squared_cdf_double.cu ; +run test_nc_chi_squared_cdf_float.cu ; +run test_nc_chi_squared_pdf_double.cu ; +run test_nc_chi_squared_pdf_float.cu ; +run test_nc_chi_squared_quan_double.cu ; +run test_nc_chi_squared_quan_float.cu ; + +run test_negative_binomial_cdf_double.cu ; +run test_negative_binomial_cdf_float.cu ; +run test_negative_binomial_pdf_double.cu ; +run test_negative_binomial_pdf_float.cu ; +run test_negative_binomial_quan_double.cu ; +run test_negative_binomial_quan_float.cu ; + +run test_normal_cdf_double.cu ; +run test_normal_cdf_float.cu ; +run test_normal_pdf_double.cu ; +run test_normal_pdf_float.cu ; +run test_normal_quan_double.cu ; +run test_normal_quan_float.cu ; + +run test_pareto_cdf_double.cu ; +run test_pareto_cdf_float.cu ; +run test_pareto_pdf_double.cu ; +run test_pareto_pdf_float.cu ; +run test_pareto_quan_double.cu ; +run test_pareto_quan_float.cu ; + +run test_poisson_cdf_double.cu ; +run test_poisson_cdf_float.cu ; +run test_poisson_pdf_double.cu ; +run test_poisson_pdf_float.cu ; +run test_poisson_quan_double.cu ; +run test_poisson_quan_float.cu ; + +run test_rayleigh_cdf_double.cu ; +run test_rayleigh_cdf_float.cu ; +run test_rayleigh_pdf_double.cu ; +run test_rayleigh_pdf_float.cu ; +run test_rayleigh_quan_double.cu ; +run test_rayleigh_quan_float.cu ; + +run test_saspoint5_cdf_double.cu ; +run test_saspoint5_cdf_float.cu ; +run test_saspoint5_pdf_double.cu ; +run test_saspoint5_pdf_float.cu ; +run test_saspoint5_quan_double.cu ; +run test_saspoint5_quan_float.cu ; + +run test_students_t_cdf_double.cu ; +run test_students_t_cdf_float.cu ; +run test_students_t_pdf_double.cu ; +run test_students_t_pdf_float.cu ; +run test_students_t_quan_double.cu ; +run test_students_t_quan_float.cu ; + +run test_triangular_cdf_double.cu ; +run test_triangular_cdf_float.cu ; +run test_triangular_pdf_double.cu ; +run test_triangular_pdf_float.cu ; +run test_triangular_quan_double.cu ; +run test_triangular_quan_float.cu ; + +run test_uniform_cdf_double.cu ; +run test_uniform_cdf_float.cu ; +run test_uniform_pdf_double.cu ; +run test_uniform_pdf_float.cu ; +run test_uniform_quan_double.cu ; +run test_uniform_quan_float.cu ; + +run test_weibull_cdf_double.cu ; +run test_weibull_cdf_float.cu ; +run test_weibull_pdf_double.cu ; +run test_weibull_pdf_float.cu ; +run test_weibull_quan_double.cu ; +run test_weibull_quan_float.cu ; + +# Special Functions +run test_airy_ai_double.cu ; +run test_airy_ai_float.cu ; +run test_airy_ai_prime_double.cu ; +run test_airy_ai_prime_float.cu ; +run test_airy_bi_double.cu ; +run test_airy_bi_float.cu ; +run test_airy_bi_prime_double.cu ; +run test_airy_bi_prime_float.cu ; + +run test_beta_double.cu ; +run test_beta_float.cu ; +run test_betac_double.cu ; +run test_betac_float.cu ; +run test_ibeta_double.cu ; +run test_ibeta_float.cu ; +run test_ibeta_derivative_double.cu ; +run test_ibeta_derivative_float.cu ; +run test_ibeta_inv_double.cu ; +run test_ibeta_inv_float.cu ; +run test_ibeta_inva_double.cu ; +run test_ibeta_inva_float.cu ; +run test_ibeta_invb_double.cu ; +run test_ibeta_invb_float.cu ; +run test_ibetac_inv_double.cu ; +run test_ibetac_inv_float.cu ; +run test_ibetac_inva_double.cu ; +run test_ibetac_inva_float.cu ; +run test_ibetac_invb_double.cu ; +run test_ibetac_invb_float.cu ; + +run test_bessel_i0_double.cu ; +run test_bessel_i0_float.cu ; +run test_bessel_i1_double.cu ; +run test_bessel_i1_float.cu ; +run test_bessel_j0_double.cu ; +run test_bessel_j0_float.cu ; +run test_bessel_j1_double.cu ; +run test_bessel_j1_float.cu ; +run test_bessel_k0_double.cu ; +run test_bessel_k0_float.cu ; +run test_bessel_k1_double.cu ; +run test_bessel_k1_float.cu ; +run test_bessel_kn_double.cu ; +run test_bessel_kn_float.cu ; +run test_bessel_y0_double.cu ; +run test_bessel_y0_float.cu ; +run test_bessel_y1_double.cu ; +run test_bessel_y1_float.cu ; +run test_cyl_bessel_i_double.cu ; +run test_cyl_bessel_i_float.cu ; +run test_cyl_bessel_j_double.cu ; +run test_cyl_bessel_j_float.cu ; +run test_cyl_bessel_k_double.cu ; +run test_cyl_bessel_k_float.cu ; +run test_sph_bessel_double.cu ; +run test_sph_bessel_float.cu ; +run test_cyl_neumann_double.cu ; +run test_cyl_neumann_float.cu ; +run test_sph_neumann_double.cu ; +run test_sph_neumann_float.cu ; +run test_cyl_hankel_1_double.cu ; +run test_cyl_hankel_1_float.cu ; +run test_cyl_hankel_2_double.cu ; +run test_cyl_hankel_2_float.cu ; +run test_sph_hankel_1_double.cu ; +run test_sph_hankel_1_float.cu ; +run test_sph_hankel_2_double.cu ; +run test_sph_hankel_2_float.cu ; + +run test_cbrt_double.cu ; +run test_cbrt_float.cu ; + +run test_changesign_double.cu ; +run test_changesign_float.cu ; + +run test_cos_pi_double.cu ; +run test_cos_pi_float.cu ; + +run test_digamma_double.cu ; +run test_digamma_float.cu ; + +run test_ellint_1_double.cu ; +run test_ellint_1_float.cu ; +run test_ellint_2_double.cu ; +run test_ellint_2_float.cu ; +run test_ellint_3_double.cu ; +run test_ellint_3_float.cu ; +run test_ellint_d_double.cu ; +run test_ellint_d_float.cu ; +run test_jacobi_zeta_double.cu ; +run test_jacobi_zeta_float.cu ; +run test_heuman_lambda_double.cu ; +run test_heuman_lambda_float.cu ; + +run test_erf_double.cu ; +run test_erf_float.cu ; +run test_erf_inv_double.cu ; +run test_erf_inv_float.cu ; +run test_erfc_double.cu ; +run test_erfc_float.cu ; +run test_erfc_inv_double.cu ; +run test_erfc_inv_float.cu ; + +run test_expint_double.cu ; +run test_expint_float.cu ; + +run test_expm1_double.cu ; +run test_expm1_float.cu ; + +run test_gegenbauer_double.cu ; +run test_gegenbauer_float.cu ; + +run test_hermite_double.cu ; +run test_hermite_float.cu ; + +run test_lgamma_double.cu ; +run test_lgamma_float.cu ; +run test_tgamma_double.cu ; +run test_tgamma_float.cu ; +run test_tgamma_ratio_double.cu ; +run test_tgamma_ratio_float.cu ; +run test_gamma_p_derivative_double.cu ; +run test_gamma_p_derivative_float.cu ; +run test_gamma_p_inv_double.cu ; +run test_gamma_p_inv_float.cu ; + +run test_log1p_double.cu ; +run test_log1p_float.cu ; + +run test_modf_double.cu ; +run test_modf_float.cu ; + +run test_round_double.cu ; +run test_round_float.cu ; + +run test_sin_pi_double.cu ; +run test_sin_pi_float.cu ; + +run test_trigamma_double.cu ; +run test_trigamma_float.cu ; + +run test_trunc_double.cu ; +run test_trunc_float.cu ; diff --git a/test/cuda_managed_ptr.hpp b/test/cuda_managed_ptr.hpp new file mode 100644 index 0000000000..3d0f3e800d --- /dev/null +++ b/test/cuda_managed_ptr.hpp @@ -0,0 +1,139 @@ + +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_CUDA_MANAGED_PTR_HPP +#define BOOST_MATH_CUDA_MANAGED_PTR_HPP + +#ifdef _MSC_VER +#pragma once +#endif + +#include + +class managed_holder_base +{ +protected: + static int count; + managed_holder_base() { ++count; } + ~managed_holder_base() + { + if(0 == --count) + cudaDeviceSynchronize(); + } +}; + +int managed_holder_base::count = 0; + +// +// Reset the device and exit: +// cudaDeviceReset causes the driver to clean up all state. While +// not mandatory in normal operation, it is good practice. It is also +// needed to ensure correct operation when the application is being +// profiled. Calling cudaDeviceReset causes all profile data to be +// flushed before the application exits. +// +// We have a global instance of this class, plus instances for each +// managed pointer. Last one out the door switches the lights off. +// +class cudaResetter +{ + static int count; +public: + cudaResetter() { ++count; } + ~cudaResetter() + { + if(--count == 0) + { + cudaError_t err = cudaDeviceReset(); + if(err != cudaSuccess) + { + std::cerr << "Failed to deinitialize the device! error=" << cudaGetErrorString(err) << std::endl; + } + } + } +}; + +int cudaResetter::count = 0; + +cudaResetter global_resetter; + +template +class cuda_managed_ptr +{ + T* data; + static const cudaResetter resetter; + cuda_managed_ptr(const cuda_managed_ptr&) = delete; + cuda_managed_ptr& operator=(cuda_managed_ptr const&) = delete; + void free() + { + if(data) + { + cudaDeviceSynchronize(); + cudaError_t err = cudaFree(data); + if(err != cudaSuccess) + { + std::cerr << "Failed to deinitialize the device! error=" << cudaGetErrorString(err) << std::endl; + } + } + } +public: + cuda_managed_ptr() : data(0) {} + cuda_managed_ptr(std::size_t n) + { + cudaError_t err = cudaSuccess; + void *ptr; + err = cudaMallocManaged(&ptr, n * sizeof(T)); + if(err != cudaSuccess) + throw std::runtime_error(cudaGetErrorString(err)); + cudaDeviceSynchronize(); + data = static_cast(ptr); + } + cuda_managed_ptr(cuda_managed_ptr&& o) + { + data = o.data; + o.data = 0; + } + cuda_managed_ptr& operator=(cuda_managed_ptr&& o) + { + free(); + data = o.data; + o.data = 0; + return *this; + } + ~cuda_managed_ptr() + { + free(); + } + + class managed_holder : managed_holder_base + { + T* pdata; + public: + managed_holder(T* p) : managed_holder_base(), pdata(p) {} + managed_holder(const managed_holder& o) : managed_holder_base(), pdata(o.pdata) {} + operator T* () { return pdata; } + T& operator[] (std::size_t n) { return pdata[n]; } + }; + class const_managed_holder : managed_holder_base + { + const T* pdata; + public: + const_managed_holder(T* p) : managed_holder_base(), pdata(p) {} + const_managed_holder(const managed_holder& o) : managed_holder_base(), pdata(o.pdata) {} + operator const T* () { return pdata; } + const T& operator[] (std::size_t n) { return pdata[n]; } + }; + + managed_holder get() { return managed_holder(data); } + const_managed_holder get()const { return data; } + T& operator[](std::size_t n) { return data[n]; } + const T& operator[](std::size_t n)const { return data[n]; } +}; + +template +cudaResetter const cuda_managed_ptr::resetter; + +#endif diff --git a/test/daubechies_scaling_test.cpp b/test/daubechies_scaling_test.cpp index 3fd0937ece..e2dda727e4 100644 --- a/test/daubechies_scaling_test.cpp +++ b/test/daubechies_scaling_test.cpp @@ -297,8 +297,8 @@ void test_first_derivative() // Limited precision test data means we can't test long double here... #else auto phi1_3 = boost::math::detail::daubechies_scaling_integer_grid(); - std::array lin_3{0.0L, 1.638452340884085725014976L, -2.232758190463137395017742L, - 0.5501593582740176149905562L, 0.04414649130503405501220997L, 0.0L}; + std::array lin_3{0.0L, 1.638452340884085725014976113635604107L, -2.23275819046313739501774225255380757L, + 0.550159358274017614990556164200803310L, 0.044146491305034055012209974717400368L, 0.0L}; for (size_t i = 0; i < lin_3.size(); ++i) { if(!CHECK_ULP_CLOSE(lin_3[i], phi1_3[i], 0)) @@ -308,8 +308,8 @@ void test_first_derivative() } auto phi1_4 = boost::math::detail::daubechies_scaling_integer_grid(); - std::array lin_4 = {0.0L, 1.776072007522184640093776L, -2.785349397229543142492785L, 1.192452536632278174347632L, - -0.1313745151846729587935189L, -0.05357102822023923595359996L,0.001770396479992522798495351L, 0.0L}; + std::array lin_4 = {0.0L, 1.776072007522184640093776071522502761L, -2.785349397229543142492784905731245880L, 1.192452536632278174347632339082851360L, + -0.131374515184672958793518896272545740L, -0.053571028220239235953599959390993709L,0.001770396479992522798495350789431024L, 0.0L}; for (size_t i = 0; i < lin_4.size(); ++i) { @@ -319,8 +319,8 @@ void test_first_derivative() } } - std::array lin_5 = {0.0L, 1.558326313047001366564379L, -2.436012783189551921436896L, 1.235905129801454293947039L, -0.3674377136938866359947561L, - -0.02178035117564654658884556L,0.03234719350814368885815854L,-0.001335619912770701035229331L,-0.00001216838474354431384970525L,0.0L}; + std::array lin_5 = {0.0L, 1.558326313047001366564379221011472479L, -2.436012783189551921436895932290077033L, 1.235905129801454293947038906779457610L, -0.367437713693886635994756136622838186L, + -0.021780351175646546588845564309594589L,0.032347193508143688858158541500450925L,-0.001335619912770701035229330817898250L,-0.000012168384743544313849705250972915L,0.0L}; auto phi1_5 = boost::math::detail::daubechies_scaling_integer_grid(); for (size_t i = 0; i < lin_5.size(); ++i) { diff --git a/test/float128/log1p_expm1_test.cpp b/test/float128/log1p_expm1_test.cpp index 7948614403..c46d5b099a 100644 --- a/test/float128/log1p_expm1_test.cpp +++ b/test/float128/log1p_expm1_test.cpp @@ -14,7 +14,7 @@ #include "table_type.hpp" -#include "libs/math/test/log1p_expm1_test.hpp" +#include "log1p_expm1_test.hpp" // // DESCRIPTION: diff --git a/test/float128/powm1_sqrtp1m1_test.cpp b/test/float128/powm1_sqrtp1m1_test.cpp index 73972bb7a0..bfc219bcc6 100644 --- a/test/float128/powm1_sqrtp1m1_test.cpp +++ b/test/float128/powm1_sqrtp1m1_test.cpp @@ -16,7 +16,7 @@ #include "table_type.hpp" -#include "libs/math/test/powm1_sqrtp1m1_test.hpp" +#include "powm1_sqrtp1m1_test.hpp" // // DESCRIPTION: diff --git a/test/float128/table_type.hpp b/test/float128/table_type.hpp index 6560762db4..7e5c07b248 100644 --- a/test/float128/table_type.hpp +++ b/test/float128/table_type.hpp @@ -5,7 +5,7 @@ #ifndef BOOST_MP_TABLE_TYPE -#include +#include #define SC_(x) BOOST_FLOATMAX_C(x) diff --git a/test/float128/test_bessel_i.cpp b/test/float128/test_bessel_i.cpp index 952cc9d6a6..7e0374cdfc 100644 --- a/test/float128/test_bessel_i.cpp +++ b/test/float128/test_bessel_i.cpp @@ -12,7 +12,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_bessel_i.hpp" +#include "test_bessel_i.hpp" void expected_results() { diff --git a/test/float128/test_bessel_j.cpp b/test/float128/test_bessel_j.cpp index 7afeeebeb4..f3bab11fd7 100644 --- a/test/float128/test_bessel_j.cpp +++ b/test/float128/test_bessel_j.cpp @@ -12,7 +12,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_bessel_j.hpp" +#include "test_bessel_j.hpp" void expected_results() { diff --git a/test/float128/test_bessel_k.cpp b/test/float128/test_bessel_k.cpp index a5ec1e2b4e..7f7144649f 100644 --- a/test/float128/test_bessel_k.cpp +++ b/test/float128/test_bessel_k.cpp @@ -12,7 +12,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_bessel_k.hpp" +#include "test_bessel_k.hpp" void expected_results() { diff --git a/test/float128/test_bessel_y.cpp b/test/float128/test_bessel_y.cpp index 55bdf56e71..240d7785a1 100644 --- a/test/float128/test_bessel_y.cpp +++ b/test/float128/test_bessel_y.cpp @@ -12,7 +12,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_bessel_y.hpp" +#include "test_bessel_y.hpp" void expected_results() { diff --git a/test/float128/test_beta.cpp b/test/float128/test_beta.cpp index 6cfddd566a..ecdf347964 100644 --- a/test/float128/test_beta.cpp +++ b/test/float128/test_beta.cpp @@ -12,7 +12,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_beta.hpp" +#include "test_beta.hpp" void expected_results() { diff --git a/test/float128/test_binomial_coeff.cpp b/test/float128/test_binomial_coeff.cpp index be208f4f55..392150b06f 100644 --- a/test/float128/test_binomial_coeff.cpp +++ b/test/float128/test_binomial_coeff.cpp @@ -12,7 +12,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_binomial_coeff.hpp" +#include "test_binomial_coeff.hpp" void expected_results() { diff --git a/test/float128/test_carlson.cpp b/test/float128/test_carlson.cpp index 1458493732..0954816c36 100644 --- a/test/float128/test_carlson.cpp +++ b/test/float128/test_carlson.cpp @@ -11,7 +11,7 @@ #include #include #include -#include "libs/math/test/test_carlson.hpp" +#include "test_carlson.hpp" void expected_results() { diff --git a/test/float128/test_cbrt.cpp b/test/float128/test_cbrt.cpp index d6690bdd98..7b53b8a434 100644 --- a/test/float128/test_cbrt.cpp +++ b/test/float128/test_cbrt.cpp @@ -7,7 +7,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_cbrt.hpp" +#include "test_cbrt.hpp" void expected_results() { diff --git a/test/float128/test_digamma.cpp b/test/float128/test_digamma.cpp index 9856223bef..5702ccf73b 100644 --- a/test/float128/test_digamma.cpp +++ b/test/float128/test_digamma.cpp @@ -7,7 +7,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_digamma.hpp" +#include "test_digamma.hpp" void expected_results() { diff --git a/test/float128/test_ellint_1.cpp b/test/float128/test_ellint_1.cpp index 5c259e9405..90f9e1bed5 100644 --- a/test/float128/test_ellint_1.cpp +++ b/test/float128/test_ellint_1.cpp @@ -7,7 +7,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_ellint_1.hpp" +#include "test_ellint_1.hpp" void expected_results() { diff --git a/test/float128/test_ellint_2.cpp b/test/float128/test_ellint_2.cpp index 8b05124670..3f43e8c1dc 100644 --- a/test/float128/test_ellint_2.cpp +++ b/test/float128/test_ellint_2.cpp @@ -7,7 +7,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_ellint_2.hpp" +#include "test_ellint_2.hpp" void expected_results() { diff --git a/test/float128/test_ellint_3.cpp b/test/float128/test_ellint_3.cpp index e462683888..644dd1cbb8 100644 --- a/test/float128/test_ellint_3.cpp +++ b/test/float128/test_ellint_3.cpp @@ -7,7 +7,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_ellint_3.hpp" +#include "test_ellint_3.hpp" void expected_results() { diff --git a/test/float128/test_erf.cpp b/test/float128/test_erf.cpp index 90efd139e7..5f3bcaa46a 100644 --- a/test/float128/test_erf.cpp +++ b/test/float128/test_erf.cpp @@ -8,7 +8,7 @@ #define TEST_UDT #include -#include "libs/math/test/test_erf.hpp" +#include "test_erf.hpp" void expected_results() { diff --git a/test/float128/test_expint.cpp b/test/float128/test_expint.cpp index 73a1f6bb5f..ff9ece822d 100644 --- a/test/float128/test_expint.cpp +++ b/test/float128/test_expint.cpp @@ -7,7 +7,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_expint.hpp" +#include "test_expint.hpp" void expected_results() { diff --git a/test/float128/test_gamma.cpp b/test/float128/test_gamma.cpp index ddaae2adf0..bcec76083e 100644 --- a/test/float128/test_gamma.cpp +++ b/test/float128/test_gamma.cpp @@ -7,7 +7,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_gamma.hpp" +#include "test_gamma.hpp" void expected_results() { diff --git a/test/float128/test_hermite.cpp b/test/float128/test_hermite.cpp index f933c6d27f..3b6fcdfb5d 100644 --- a/test/float128/test_hermite.cpp +++ b/test/float128/test_hermite.cpp @@ -7,7 +7,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_hermite.hpp" +#include "test_hermite.hpp" void expected_results() { diff --git a/test/float128/test_ibeta.cpp b/test/float128/test_ibeta.cpp index 708a6950de..c46da77665 100644 --- a/test/float128/test_ibeta.cpp +++ b/test/float128/test_ibeta.cpp @@ -7,7 +7,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_ibeta.hpp" +#include "test_ibeta.hpp" void expected_results() { diff --git a/test/float128/test_ibeta_inv_1.cpp b/test/float128/test_ibeta_inv_1.cpp index 68049024f3..2fc059740c 100644 --- a/test/float128/test_ibeta_inv_1.cpp +++ b/test/float128/test_ibeta_inv_1.cpp @@ -7,7 +7,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_ibeta_inv.hpp" +#include "test_ibeta_inv.hpp" void expected_results() { diff --git a/test/float128/test_ibeta_inv_ab_4.cpp b/test/float128/test_ibeta_inv_ab_4.cpp index 3e0bc85816..d02a99f26f 100644 --- a/test/float128/test_ibeta_inv_ab_4.cpp +++ b/test/float128/test_ibeta_inv_ab_4.cpp @@ -11,7 +11,7 @@ #define FULL_TEST #include -#include "libs/math/test/test_ibeta_inv_ab.hpp" +#include "test_ibeta_inv_ab.hpp" void expected_results() { diff --git a/test/float128/test_igamma.cpp b/test/float128/test_igamma.cpp index d533254841..7a987c643e 100644 --- a/test/float128/test_igamma.cpp +++ b/test/float128/test_igamma.cpp @@ -7,7 +7,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_igamma.hpp" +#include "test_igamma.hpp" void expected_results() { diff --git a/test/float128/test_igamma_inv.cpp b/test/float128/test_igamma_inv.cpp index 122db9d42e..bc9b4289e2 100644 --- a/test/float128/test_igamma_inv.cpp +++ b/test/float128/test_igamma_inv.cpp @@ -7,7 +7,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_igamma_inv.hpp" +#include "test_igamma_inv.hpp" void expected_results() { diff --git a/test/float128/test_igamma_inva.cpp b/test/float128/test_igamma_inva.cpp index 0a244c2f45..19daa7cfa7 100644 --- a/test/float128/test_igamma_inva.cpp +++ b/test/float128/test_igamma_inva.cpp @@ -7,7 +7,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_igamma_inva.hpp" +#include "test_igamma_inva.hpp" void expected_results() { diff --git a/test/float128/test_laguerre.cpp b/test/float128/test_laguerre.cpp index 04ae016b94..dfa475448c 100644 --- a/test/float128/test_laguerre.cpp +++ b/test/float128/test_laguerre.cpp @@ -7,7 +7,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_laguerre.hpp" +#include "test_laguerre.hpp" void expected_results() { diff --git a/test/float128/test_legendre.cpp b/test/float128/test_legendre.cpp index 463c2d90a7..e6de9644a7 100644 --- a/test/float128/test_legendre.cpp +++ b/test/float128/test_legendre.cpp @@ -7,7 +7,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_legendre.hpp" +#include "test_legendre.hpp" void expected_results() { diff --git a/test/float128/test_polygamma.cpp b/test/float128/test_polygamma.cpp index e62cef9af6..1ff41e34f1 100644 --- a/test/float128/test_polygamma.cpp +++ b/test/float128/test_polygamma.cpp @@ -7,7 +7,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_polygamma.hpp" +#include "test_polygamma.hpp" void expected_results() { diff --git a/test/float128/test_tgamma_ratio.cpp b/test/float128/test_tgamma_ratio.cpp index 9d2568f558..0138ce472f 100644 --- a/test/float128/test_tgamma_ratio.cpp +++ b/test/float128/test_tgamma_ratio.cpp @@ -7,7 +7,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_tgamma_ratio.hpp" +#include "test_tgamma_ratio.hpp" void expected_results() { diff --git a/test/float128/test_trigamma.cpp b/test/float128/test_trigamma.cpp index 8862b53d00..3ab2f9473b 100644 --- a/test/float128/test_trigamma.cpp +++ b/test/float128/test_trigamma.cpp @@ -7,7 +7,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_trigamma.hpp" +#include "test_trigamma.hpp" void expected_results() { diff --git a/test/float128/test_zeta.cpp b/test/float128/test_zeta.cpp index 4dcdf19cc1..67aebfbff5 100644 --- a/test/float128/test_zeta.cpp +++ b/test/float128/test_zeta.cpp @@ -7,7 +7,7 @@ #include "table_type.hpp" #include -#include "libs/math/test/test_zeta.hpp" +#include "test_zeta.hpp" void expected_results() { diff --git a/test/git_issue_1175.cpp b/test/git_issue_1175.cpp new file mode 100644 index 0000000000..9770acf537 --- /dev/null +++ b/test/git_issue_1175.cpp @@ -0,0 +1,25 @@ +// (C) Copyright Matt Borland 2023. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include "math_unit_test.hpp" +#include +#include + +using namespace std; +using boost::math::beta_distribution; + +int main(int argc, char* argv[]) +{ + double a = 5.0; + double b = 5.0; + double p = 0.5; + + beta_distribution<> dist(a, b); + double x = quantile(dist, p); + + CHECK_ULP_CLOSE(x, 0.5, 2); + + return boost::math::test::report_errors(); +} diff --git a/test/git_issue_1194.cpp b/test/git_issue_1194.cpp new file mode 100644 index 0000000000..1c364a0c4d --- /dev/null +++ b/test/git_issue_1194.cpp @@ -0,0 +1,41 @@ +// (C) Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include "math_unit_test.hpp" +#include +#include + +int main() +{ + using c99_error_policy = ::boost::math::policies::policy< + ::boost::math::policies::domain_error< ::boost::math::policies::errno_on_error>, + ::boost::math::policies::pole_error< ::boost::math::policies::errno_on_error>, + ::boost::math::policies::overflow_error< ::boost::math::policies::errno_on_error>, + ::boost::math::policies::evaluation_error< ::boost::math::policies::errno_on_error>, + ::boost::math::policies::rounding_error< ::boost::math::policies::errno_on_error> >; + + double val = -std::numeric_limits::infinity(); + + val = boost::math::tgamma(val, c99_error_policy()); + CHECK_EQUAL(errno, EDOM); + + val = std::numeric_limits::quiet_NaN(); + val = boost::math::tgamma(val, c99_error_policy()); + CHECK_EQUAL(errno, EDOM); + + val = std::numeric_limits::infinity(); + val = boost::math::tgamma(val, c99_error_policy()); + CHECK_EQUAL(errno, ERANGE); + + val = 0; + val = boost::math::tgamma(val, c99_error_policy()); + CHECK_EQUAL(errno, EDOM); // OK + + val = -2; + val = boost::math::tgamma(val, c99_error_policy()); + CHECK_EQUAL(errno, EDOM); // OK + + return boost::math::test::report_errors(); +} diff --git a/test/handle_test_result.hpp b/test/handle_test_result.hpp index e909d64583..66bfe557b1 100644 --- a/test/handle_test_result.hpp +++ b/test/handle_test_result.hpp @@ -6,8 +6,8 @@ #ifndef BOOST_MATH_HANDLE_TEST_RESULT #define BOOST_MATH_HANDLE_TEST_RESULT +#include "../include_private/boost/math/tools/test.hpp" #include -#include #include #include #include diff --git a/test/nvrtc_jamfile b/test/nvrtc_jamfile new file mode 100644 index 0000000000..e049a24d2b --- /dev/null +++ b/test/nvrtc_jamfile @@ -0,0 +1,388 @@ +# Copyright 2024 Matt Borland +# Distributed under the Boost Software License, Version 1.0. +# https://www.boost.org/LICENSE_1_0.txt + +import testing ; +import ../../config/checks/config : requires ; + +project : requirements + [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ] + ; + +# Quad +run test_exp_sinh_quad_nvrtc_float.cpp ; +run test_exp_sinh_quad_nvrtc_double.cpp ; +run test_sinh_sinh_quad_nvrtc_float.cpp ; +run test_sinh_sinh_quad_nvrtc_double.cpp ; + +# Distributions +run test_arcsine_cdf_nvrtc_double.cpp ; +run test_arcsine_cdf_nvrtc_float.cpp ; +run test_arcsine_pdf_nvrtc_double.cpp ; +run test_arcsine_pdf_nvrtc_float.cpp ; +run test_arcsine_quantile_nvrtc_double.cpp ; +run test_arcsine_quantile_nvrtc_float.cpp ; + +run test_bernoulli_cdf_nvrtc_double.cpp ; +run test_bernoulli_cdf_nvrtc_float.cpp ; +run test_bernoulli_pdf_nvrtc_double.cpp ; +run test_bernoulli_pdf_nvrtc_float.cpp ; +run test_bernoulli_quan_nvrtc_double.cpp ; +run test_bernoulli_quan_nvrtc_float.cpp ; + +run test_beta_dist_cdf_nvrtc_double.cpp ; +run test_beta_dist_cdf_nvrtc_float.cpp ; +run test_beta_dist_pdf_nvrtc_double.cpp ; +run test_beta_dist_pdf_nvrtc_float.cpp ; +run test_beta_dist_quan_nvrtc_double.cpp ; +run test_beta_dist_quan_nvrtc_float.cpp ; + +run test_cauchy_cdf_nvrtc_double.cpp ; +run test_cauchy_cdf_nvrtc_float.cpp ; +run test_cauchy_pdf_nvrtc_double.cpp ; +run test_cauchy_pdf_nvrtc_float.cpp ; +run test_cauchy_quan_nvrtc_double.cpp ; +run test_cauchy_quan_nvrtc_float.cpp ; + +run test_chi_squared_cdf_nvrtc_double.cpp ; +run test_chi_squared_cdf_nvrtc_float.cpp ; +run test_chi_squared_pdf_nvrtc_double.cpp ; +run test_chi_squared_pdf_nvrtc_float.cpp ; +run test_chi_squared_quan_nvrtc_double.cpp ; +run test_chi_squared_quan_nvrtc_float.cpp ; + +run test_exponential_cdf_nvrtc_double.cpp ; +run test_exponential_cdf_nvrtc_float.cpp ; +run test_exponential_pdf_nvrtc_double.cpp ; +run test_exponential_pdf_nvrtc_float.cpp ; +run test_exponential_quan_nvrtc_double.cpp ; +run test_exponential_quan_nvrtc_float.cpp ; + +run test_extreme_value_cdf_nvrtc_double.cpp ; +run test_extreme_value_cdf_nvrtc_float.cpp ; +run test_extreme_value_pdf_nvrtc_double.cpp ; +run test_extreme_value_pdf_nvrtc_float.cpp ; +run test_extreme_value_quan_nvrtc_double.cpp ; +run test_extreme_value_quan_nvrtc_float.cpp ; + +run test_fisher_f_cdf_nvrtc_double.cpp ; +run test_fisher_f_cdf_nvrtc_float.cpp ; +run test_fisher_f_pdf_nvrtc_double.cpp ; +run test_fisher_f_pdf_nvrtc_float.cpp ; +run test_fisher_f_quan_nvrtc_double.cpp ; +run test_fisher_f_quan_nvrtc_float.cpp ; + +run test_gamma_dist_cdf_nvrtc_double.cpp ; +run test_gamma_dist_cdf_nvrtc_float.cpp ; +run test_gamma_dist_pdf_nvrtc_double.cpp ; +run test_gamma_dist_pdf_nvrtc_float.cpp ; +run test_gamma_dist_quan_nvrtc_double.cpp ; +run test_gamma_dist_quan_nvrtc_float.cpp ; + +run test_geometric_dist_cdf_nvrtc_double.cpp ; +run test_geometric_dist_cdf_nvrtc_float.cpp ; +run test_geometric_dist_pdf_nvrtc_double.cpp ; +run test_geometric_dist_pdf_nvrtc_float.cpp ; +run test_geometric_dist_quan_nvrtc_double.cpp ; +run test_geometric_dist_quan_nvrtc_float.cpp ; + +run test_holtsmark_cdf_nvrtc_double.cpp ; +run test_holtsmark_cdf_nvrtc_float.cpp ; +run test_holtsmark_pdf_nvrtc_double.cpp ; +run test_holtsmark_pdf_nvrtc_float.cpp ; +run test_holtsmark_quan_nvrtc_double.cpp ; +run test_holtsmark_quan_nvrtc_float.cpp ; + +run test_inverse_chi_squared_cdf_nvrtc_double.cpp ; +run test_inverse_chi_squared_cdf_nvrtc_float.cpp ; +run test_inverse_chi_squared_pdf_nvrtc_double.cpp ; +run test_inverse_chi_squared_pdf_nvrtc_float.cpp ; +run test_inverse_chi_squared_quan_nvrtc_double.cpp ; +run test_inverse_chi_squared_quan_nvrtc_float.cpp ; + +run test_inverse_gamma_cdf_nvrtc_double.cpp ; +run test_inverse_gamma_cdf_nvrtc_float.cpp ; +run test_inverse_gamma_pdf_nvrtc_double.cpp ; +run test_inverse_gamma_pdf_nvrtc_float.cpp ; +run test_inverse_gamma_quan_nvrtc_double.cpp ; +run test_inverse_gamma_quan_nvrtc_float.cpp ; + +run test_inverse_gaussian_cdf_nvrtc_double.cpp ; +run test_inverse_gaussian_cdf_nvrtc_float.cpp ; +run test_inverse_gaussian_pdf_nvrtc_double.cpp ; +run test_inverse_gaussian_pdf_nvrtc_float.cpp ; +run test_inverse_gaussian_quan_nvrtc_double.cpp ; +run test_inverse_gaussian_quan_nvrtc_float.cpp ; + +run test_landau_cdf_nvrtc_double.cpp ; +run test_landau_cdf_nvrtc_float.cpp ; +run test_landau_pdf_nvrtc_double.cpp ; +run test_landau_pdf_nvrtc_float.cpp ; +run test_landau_quan_nvrtc_double.cpp ; +run test_landau_quan_nvrtc_float.cpp ; + +run test_laplace_cdf_nvrtc_double.cpp ; +run test_laplace_cdf_nvrtc_float.cpp ; +run test_laplace_pdf_nvrtc_double.cpp ; +run test_laplace_pdf_nvrtc_float.cpp ; +run test_laplace_quan_nvrtc_double.cpp ; +run test_laplace_quan_nvrtc_float.cpp ; + +run test_logistic_cdf_nvrtc_double.cpp ; +run test_logistic_cdf_nvrtc_float.cpp ; +run test_logistic_pdf_nvrtc_double.cpp ; +run test_logistic_pdf_nvrtc_float.cpp ; +run test_logistic_quan_nvrtc_double.cpp ; +run test_logistic_quan_nvrtc_float.cpp ; + +run test_lognormal_cdf_nvrtc_double.cpp ; +run test_lognormal_cdf_nvrtc_float.cpp ; +run test_lognormal_pdf_nvrtc_double.cpp ; +run test_lognormal_pdf_nvrtc_float.cpp ; +run test_lognormal_quan_nvrtc_double.cpp ; +run test_lognormal_quan_nvrtc_float.cpp ; + +run test_mapairy_cdf_nvrtc_double.cpp ; +run test_mapairy_cdf_nvrtc_float.cpp ; +run test_mapairy_pdf_nvrtc_double.cpp ; +run test_mapairy_pdf_nvrtc_float.cpp ; +run test_mapairy_quan_nvrtc_double.cpp ; +run test_mapairy_quan_nvrtc_float.cpp ; + +run test_nc_beta_cdf_nvrtc_double.cpp ; +run test_nc_beta_cdf_nvrtc_float.cpp ; +run test_nc_beta_pdf_nvrtc_double.cpp ; +run test_nc_beta_pdf_nvrtc_float.cpp ; +run test_nc_beta_quan_nvrtc_double.cpp ; +run test_nc_beta_quan_nvrtc_float.cpp ; + +run test_nc_chi_squared_cdf_nvrtc_double.cpp ; +run test_nc_chi_squared_cdf_nvrtc_float.cpp ; +run test_nc_chi_squared_pdf_nvrtc_double.cpp ; +run test_nc_chi_squared_pdf_nvrtc_float.cpp ; +run test_nc_chi_squared_quan_nvrtc_double.cpp ; +run test_nc_chi_squared_quan_nvrtc_float.cpp ; + +run test_nc_f_cdf_nvrtc_double.cpp ; +run test_nc_f_cdf_nvrtc_float.cpp ; +run test_nc_f_pdf_nvrtc_double.cpp ; +run test_nc_f_pdf_nvrtc_float.cpp ; +run test_nc_f_quan_nvrtc_double.cpp ; +run test_nc_f_quan_nvrtc_float.cpp ; + +run test_negative_binomial_cdf_nvrtc_double.cpp ; +run test_negative_binomial_cdf_nvrtc_float.cpp ; +run test_negative_binomial_pdf_nvrtc_double.cpp ; +run test_negative_binomial_pdf_nvrtc_float.cpp ; +run test_negative_binomial_quan_nvrtc_double.cpp ; +run test_negative_binomial_quan_nvrtc_float.cpp ; + +run test_normal_cdf_nvrtc_double.cpp ; +run test_normal_cdf_nvrtc_float.cpp ; +run test_normal_pdf_nvrtc_double.cpp ; +run test_normal_pdf_nvrtc_float.cpp ; +run test_normal_quan_nvrtc_double.cpp ; +run test_normal_quan_nvrtc_float.cpp ; + +run test_pareto_cdf_nvrtc_double.cpp ; +run test_pareto_cdf_nvrtc_float.cpp ; +run test_pareto_pdf_nvrtc_double.cpp ; +run test_pareto_pdf_nvrtc_float.cpp ; +run test_pareto_quan_nvrtc_double.cpp ; +run test_pareto_quan_nvrtc_float.cpp ; + +run test_poisson_cdf_nvrtc_double.cpp ; +run test_poisson_cdf_nvrtc_float.cpp ; +run test_poisson_pdf_nvrtc_double.cpp ; +run test_poisson_pdf_nvrtc_float.cpp ; +run test_poisson_quan_nvrtc_double.cpp ; +run test_poisson_quan_nvrtc_float.cpp ; + +run test_rayleigh_cdf_nvrtc_double.cpp ; +run test_rayleigh_cdf_nvrtc_float.cpp ; +run test_rayleigh_pdf_nvrtc_double.cpp ; +run test_rayleigh_pdf_nvrtc_float.cpp ; +run test_rayleigh_quan_nvrtc_double.cpp ; +run test_rayleigh_quan_nvrtc_float.cpp ; + +run test_saspoint5_cdf_nvrtc_double.cpp ; +run test_saspoint5_cdf_nvrtc_float.cpp ; +run test_saspoint5_pdf_nvrtc_double.cpp ; +run test_saspoint5_pdf_nvrtc_float.cpp ; +run test_saspoint5_quan_nvrtc_double.cpp ; +run test_saspoint5_quan_nvrtc_float.cpp ; + +run test_students_t_cdf_nvrtc_double.cpp ; +run test_students_t_cdf_nvrtc_float.cpp ; +run test_students_t_pdf_nvrtc_double.cpp ; +run test_students_t_pdf_nvrtc_float.cpp ; +run test_students_t_quan_nvrtc_double.cpp ; +run test_students_t_quan_nvrtc_float.cpp ; + +run test_triangular_cdf_nvrtc_double.cpp ; +run test_triangular_cdf_nvrtc_float.cpp ; +run test_triangular_pdf_nvrtc_double.cpp ; +run test_triangular_pdf_nvrtc_float.cpp ; +run test_triangular_quan_nvrtc_double.cpp ; +run test_triangular_quan_nvrtc_float.cpp ; + +run test_uniform_cdf_nvrtc_double.cpp ; +run test_uniform_cdf_nvrtc_float.cpp ; +run test_uniform_pdf_nvrtc_double.cpp ; +run test_uniform_pdf_nvrtc_float.cpp ; +run test_uniform_quan_nvrtc_double.cpp ; +run test_uniform_quan_nvrtc_float.cpp ; + +run test_weibull_cdf_nvrtc_double.cpp ; +run test_weibull_cdf_nvrtc_float.cpp ; +run test_weibull_pdf_nvrtc_double.cpp ; +run test_weibull_pdf_nvrtc_float.cpp ; +run test_weibull_quan_nvrtc_double.cpp ; +run test_weibull_quan_nvrtc_float.cpp ; + +# Special Functions +run test_airy_ai_nvrtc_double.cpp ; +run test_airy_ai_nvrtc_float.cpp ; +run test_airy_ai_prime_nvrtc_double.cpp ; +run test_airy_ai_prime_nvrtc_float.cpp ; +run test_airy_bi_nvrtc_double.cpp ; +run test_airy_bi_nvrtc_float.cpp ; +run test_airy_bi_prime_nvrtc_double.cpp ; +run test_airy_bi_prime_nvrtc_float.cpp ; + +run test_beta_nvrtc_double.cpp ; +run test_beta_nvrtc_float.cpp ; +run test_betac_nvrtc_double.cpp ; +run test_betac_nvrtc_float.cpp ; +run test_ibeta_nvrtc_double.cpp ; +run test_ibeta_nvrtc_float.cpp ; +run test_ibetac_nvrtc_double.cpp ; +run test_ibetac_nvrtc_float.cpp ; +run test_ibeta_derivative_nvrtc_double.cpp ; +run test_ibeta_derivative_nvrtc_float.cpp ; +run test_ibeta_inv_nvrtc_double.cpp ; +run test_ibeta_inv_nvrtc_float.cpp ; +run test_ibeta_inva_nvrtc_double.cpp ; +run test_ibeta_inva_nvrtc_float.cpp ; +run test_ibeta_invb_nvrtc_double.cpp ; +run test_ibeta_invb_nvrtc_float.cpp ; +run test_ibetac_inv_nvrtc_double.cpp ; +run test_ibetac_inv_nvrtc_float.cpp ; +run test_ibetac_inva_nvrtc_double.cpp ; +run test_ibetac_inva_nvrtc_float.cpp ; +run test_ibetac_invb_nvrtc_double.cpp ; +run test_ibetac_invb_nvrtc_float.cpp ; + +run test_bessel_i0_nvrtc_double.cpp ; +run test_bessel_i0_nvrtc_float.cpp ; +run test_bessel_i1_nvrtc_double.cpp ; +run test_bessel_i1_nvrtc_float.cpp ; +run test_bessel_j0_nvrtc_double.cpp ; +run test_bessel_j0_nvrtc_float.cpp ; +run test_bessel_j1_nvrtc_double.cpp ; +run test_bessel_j1_nvrtc_float.cpp ; +run test_bessel_k0_nvrtc_double.cpp ; +run test_bessel_k0_nvrtc_float.cpp ; +run test_bessel_k1_nvrtc_double.cpp ; +run test_bessel_k1_nvrtc_float.cpp ; +run test_bessel_kn_nvrtc_double.cpp ; +run test_bessel_kn_nvrtc_float.cpp ; +run test_bessel_y0_nvrtc_double.cpp ; +run test_bessel_y0_nvrtc_float.cpp ; +run test_bessel_y1_nvrtc_double.cpp ; +run test_bessel_y1_nvrtc_float.cpp ; +run test_cyl_bessel_i_nvrtc_double.cpp ; +run test_cyl_bessel_i_nvrtc_float.cpp ; +run test_cyl_bessel_j_nvrtc_double.cpp ; +run test_cyl_bessel_j_nvrtc_float.cpp ; +run test_cyl_bessel_k_nvrtc_double.cpp ; +run test_cyl_bessel_k_nvrtc_float.cpp ; +run test_sph_bessel_nvrtc_double.cpp ; +run test_sph_bessel_nvrtc_float.cpp ; +run test_cyl_neumann_nvrtc_double.cpp ; +run test_cyl_neumann_nvrtc_float.cpp ; +run test_sph_neumann_nvrtc_double.cpp ; +run test_sph_neumann_nvrtc_float.cpp ; +run test_cyl_hankel_1_nvrtc_double.cpp ; +run test_cyl_hankel_1_nvrtc_float.cpp ; +run test_cyl_hankel_2_nvrtc_double.cpp ; +run test_cyl_hankel_2_nvrtc_float.cpp ; +run test_sph_hankel_1_nvrtc_double.cpp ; +run test_sph_hankel_1_nvrtc_float.cpp ; +run test_sph_hankel_2_nvrtc_double.cpp ; +run test_sph_hankel_2_nvrtc_float.cpp ; + +run test_cbrt_nvrtc_double.cpp ; +run test_cbrt_nvrtc_float.cpp ; + +run test_cos_pi_nvrtc_double.cpp ; +run test_cos_pi_nvrtc_float.cpp ; + +run test_digamma_nvrtc_double.cpp ; +run test_digamma_nvrtc_float.cpp ; + +run test_erf_nvrtc_double.cpp ; +run test_erf_nvrtc_float.cpp ; +run test_erfc_nvrtc_double.cpp ; +run test_erfc_nvrtc_float.cpp ; +run test_erf_inv_nvrtc_double.cpp ; +run test_erf_inv_nvrtc_float.cpp ; +run test_erfc_inv_nvrtc_double.cpp ; +run test_erfc_inv_nvrtc_float.cpp ; + +run test_ellint_1_nvrtc_double.cpp ; +run test_ellint_1_nvrtc_float.cpp ; +run test_ellint_2_nvrtc_double.cpp ; +run test_ellint_2_nvrtc_float.cpp ; +run test_ellint_3_nvrtc_double.cpp ; +run test_ellint_3_nvrtc_float.cpp ; +run test_ellint_d_nvrtc_double.cpp ; +run test_ellint_d_nvrtc_float.cpp ; +run test_jacobi_zeta_nvrtc_double.cpp ; +run test_jacobi_zeta_nvrtc_float.cpp ; +run test_heumann_lambda_nvrtc_double.cpp ; +run test_heumann_lambda_nvrtc_float.cpp ; + +run test_expint_nvrtc_double.cpp ; +run test_expint_nvrtc_float.cpp ; + +run test_expm1_nvrtc_double.cpp ; +run test_expm1_nvrtc_float.cpp ; + +run test_fpclassify_nvrtc_double.cpp ; +run test_fpclassify_nvrtc_float.cpp ; + +run test_gamma_nvrtc_double.cpp ; +run test_gamma_nvrtc_float.cpp ; +run test_gamma_p_derivative_nvrtc_double.cpp ; +run test_gamma_p_derivative_nvrtc_float.cpp ; +run test_gamma_p_inv_nvrtc_double.cpp ; +run test_gamma_p_inv_nvrtc_float.cpp ; +run test_tgamma_ratio_nvrtc_double.cpp ; +run test_tgamma_ratio_nvrtc_float.cpp ; + +run test_gegenbauer_nvrtc_double.cpp ; +run test_gegenbauer_nvrtc_float.cpp ; + +run test_hermite_nvrtc_double.cpp ; +run test_hermite_nvrtc_float.cpp ; + +run test_log1p_nvrtc_double.cpp ; +run test_log1p_nvrtc_float.cpp ; + +run test_modf_nvrtc_double.cpp ; +run test_modf_nvrtc_float.cpp ; + +run test_round_nvrtc_double.cpp ; +run test_round_nvrtc_float.cpp ; + +run test_sign_nvrtc_double.cpp ; +run test_sign_nvrtc_float.cpp ; + +run test_sin_pi_nvrtc_double.cpp ; +run test_sin_pi_nvrtc_float.cpp ; + +run test_trigamma_nvrtc_double.cpp ; +run test_trigamma_nvrtc_float.cpp ; + +run test_trunc_nvrtc_double.cpp ; diff --git a/test/pow_test.cpp b/test/pow_test.cpp index ce3d036ab1..24a1cd7f79 100644 --- a/test/pow_test.cpp +++ b/test/pow_test.cpp @@ -2,6 +2,7 @@ // Tests the pow function // (C) Copyright Bruno Lalande 2008. +// (C) Copyright Matt Borland 2024. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) @@ -11,13 +12,12 @@ #include #include -#include +#include "../include_private/boost/math/tools/test.hpp" #define BOOST_TEST_MAIN #include #include #include -#include #include #include @@ -37,7 +37,9 @@ void test_pow(T base) if ((base == 0) && N < 0) { + #ifndef BOOST_MATH_NO_EXCEPTIONS BOOST_MATH_CHECK_THROW(math::pow(base), std::overflow_error); + #endif } else { @@ -100,15 +102,15 @@ void test_with_big_exponents() void test_return_types() { - static_assert((is_same('\1')), double>::value), "Return type mismatch"); - static_assert((is_same(L'\2')), double>::value), "Return type mismatch"); - static_assert((is_same(3)), double>::value), "Return type mismatch"); - static_assert((is_same(4u)), double>::value), "Return type mismatch"); - static_assert((is_same(5ul)), double>::value), "Return type mismatch"); - static_assert((is_same(6.0f)), float>::value), "Return type mismatch"); - static_assert((is_same(7.0)), double>::value), "Return type mismatch"); + static_assert((boost::math::is_same('\1')), double>::value), "Return type mismatch"); + static_assert((boost::math::is_same(L'\2')), double>::value), "Return type mismatch"); + static_assert((boost::math::is_same(3)), double>::value), "Return type mismatch"); + static_assert((boost::math::is_same(4u)), double>::value), "Return type mismatch"); + static_assert((boost::math::is_same(5ul)), double>::value), "Return type mismatch"); + static_assert((boost::math::is_same(6.0f)), float>::value), "Return type mismatch"); + static_assert((boost::math::is_same(7.0)), double>::value), "Return type mismatch"); #ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS - static_assert((is_same(7.0l)), long double>::value), "Return type mismatch"); + static_assert((boost::math::is_same(7.0l)), long double>::value), "Return type mismatch"); #endif } diff --git a/test/stopwatch.hpp b/test/stopwatch.hpp new file mode 100644 index 0000000000..9f3c60de80 --- /dev/null +++ b/test/stopwatch.hpp @@ -0,0 +1,39 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_CUDA_STOPWATCH_HPP +#define BOOST_MATH_CUDA_STOPWATCH_HPP + +#ifdef _MSC_VER +#pragma once +#endif + +#include + +template +struct stopwatch +{ + typedef typename Clock::duration duration; + stopwatch() + { + m_start = Clock::now(); + } + double elapsed() + { + duration t = Clock::now() - m_start; + return std::chrono::duration_cast>(t).count(); + } + void reset() + { + m_start = Clock::now(); + } + +private: + typename Clock::time_point m_start; +}; + +typedef stopwatch watch; + +#endif diff --git a/test/sycl_jamfile b/test/sycl_jamfile new file mode 100644 index 0000000000..582eaea407 --- /dev/null +++ b/test/sycl_jamfile @@ -0,0 +1,95 @@ +# Copyright 2024 Matt Borland +# Distributed under the Boost Software License, Version 1.0. +# https://www.boost.org/LICENSE_1_0.txt + +import testing ; +import ../../config/checks/config : requires ; + +project : requirements + [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ] + ; + +# Distributions +run test_arcsine.cpp ; +run test_bernoulli.cpp ; +run test_beta_dist.cpp ; +run test_cauchy.cpp ; +run test_chi_squared.cpp ; +run test_exponential_dist.cpp ; +run test_extreme_value.cpp ; +run test_fisher_f.cpp ; +run test_gamma_dist.cpp ; +run test_geometric.cpp ; +run test_holtsmark.cpp ; +run test_inverse_chi_squared_distribution.cpp ; +run test_inverse_gamma_distribution.cpp ; +run test_inverse_gaussian.cpp ; +run test_landau.cpp ; +run test_laplace.cpp ; +run test_logistic_dist.cpp ; +run test_lognormal.cpp ; +run test_mapairy.cpp ; +run test_nc_beta.cpp ; +run test_nc_chi_squared.cpp ; +run test_nc_f.cpp ; +run test_negative_binomial.cpp ; +run test_normal.cpp ; +run test_pareto.cpp ; +run test_poisson.cpp ; +run test_rayleigh.cpp ; +run test_saspoint5.cpp ; +run test_students_t.cpp ; +run test_triangular.cpp ; +run test_uniform.cpp ; +run test_weibull.cpp ; + +# Special Functions +run pow_test.cpp ; + +run test_airy.cpp ; + +run test_beta_simple.cpp ; +run test_beta.cpp ; +run test_ibeta.cpp ; +run test_ibeta_inv.cpp ; +run test_ibeta_inv_ab.cpp ; + +run test_bessel_i.cpp ; +run test_bessel_j.cpp ; +run test_bessel_k.cpp ; +run test_bessel_y.cpp ; + +run test_cbrt.cpp ; + +run test_ellint_1.cpp ; +run test_ellint_2.cpp ; +run test_ellint_d.cpp ; +run test_jacobi_zeta.cpp ; +run test_heuman_lambda.cpp ; + +run test_sign.cpp ; + +run test_round.cpp ; + +run test_expint.cpp ; + +run test_expm1_simple.cpp ; + +run gegenbauer_test.cpp ; + +run test_hankel.cpp ; + +run test_log1p_simple.cpp ; + +run test_digamma_simple.cpp ; + +run test_trigamma.cpp ; + +run test_erf.cpp ; + +run test_gamma.cpp ; +run test_igamma.cpp ; +run test_igamma_inv.cpp ; +run test_igamma_inva.cpp ; + +run test_hermite.cpp ; diff --git a/test/test_airy.cpp b/test/test_airy.cpp index d42fbb4ca3..335c5fd92c 100644 --- a/test/test_airy.cpp +++ b/test/test_airy.cpp @@ -3,14 +3,21 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif #define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error #define BOOST_TEST_MAIN #include #include #include +#include + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include +#endif + #include #include #include @@ -48,8 +55,13 @@ void test_airy(T, const char* name) }}; T tol = boost::math::tools::epsilon() * 800; - if ((std::numeric_limits::digits > 100) || (std::numeric_limits::digits == 0)) + if (boost::math::tools::digits() > 100) tol *= 2; + + #ifdef SYCL_LANGUAGE_VERSION + tol *= 5; + #endif + for(unsigned i = 0; i < data.size(); ++i) { BOOST_CHECK_CLOSE_FRACTION(data[i][1], boost::math::airy_ai(data[i][0]), tol); diff --git a/test/test_airy_ai_double.cu b/test/test_airy_ai_double.cu new file mode 100644 index 0000000000..fad46bd9d5 --- /dev/null +++ b/test/test_airy_ai_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::airy_ai(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::airy_ai(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_airy_ai_float.cu b/test/test_airy_ai_float.cu new file mode 100644 index 0000000000..b9149aec39 --- /dev/null +++ b/test/test_airy_ai_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::airy_ai(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::airy_ai(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_airy_ai_nvrtc_double.cpp b/test/test_airy_ai_nvrtc_double.cpp new file mode 100644 index 0000000000..1b918cfef2 --- /dev/null +++ b/test/test_airy_ai_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_airy_ai_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::airy_ai(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_airy_ai_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_airy_ai_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_airy_ai_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::airy_ai(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_airy_ai_nvrtc_float.cpp b/test/test_airy_ai_nvrtc_float.cpp new file mode 100644 index 0000000000..6957306426 --- /dev/null +++ b/test/test_airy_ai_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_airy_ai_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::airy_ai(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_airy_ai_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_airy_ai_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_airy_ai_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::airy_ai(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_airy_ai_prime_double.cu b/test/test_airy_ai_prime_double.cu new file mode 100644 index 0000000000..1a6bcd7104 --- /dev/null +++ b/test/test_airy_ai_prime_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::airy_ai_prime(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::airy_ai_prime(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_airy_ai_prime_float.cu b/test/test_airy_ai_prime_float.cu new file mode 100644 index 0000000000..df690c2b10 --- /dev/null +++ b/test/test_airy_ai_prime_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::airy_ai_prime(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::airy_ai_prime(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_airy_ai_prime_nvrtc_double.cpp b/test/test_airy_ai_prime_nvrtc_double.cpp new file mode 100644 index 0000000000..1012571761 --- /dev/null +++ b/test/test_airy_ai_prime_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_airy_ai_prime_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::airy_ai_prime(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_airy_ai_prime_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_airy_ai_prime_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_airy_ai_prime_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::airy_ai_prime(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_airy_ai_prime_nvrtc_float.cpp b/test/test_airy_ai_prime_nvrtc_float.cpp new file mode 100644 index 0000000000..c96e044497 --- /dev/null +++ b/test/test_airy_ai_prime_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_airy_ai_prime_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::airy_ai_prime(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_airy_ai_prime_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_airy_ai_prime_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_airy_ai_prime_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::airy_ai_prime(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_airy_bi_double.cu b/test/test_airy_bi_double.cu new file mode 100644 index 0000000000..60001a3fe5 --- /dev/null +++ b/test/test_airy_bi_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::airy_bi(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::airy_bi(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_airy_bi_float.cu b/test/test_airy_bi_float.cu new file mode 100644 index 0000000000..ed729bfe78 --- /dev/null +++ b/test/test_airy_bi_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::airy_bi(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::airy_bi(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_airy_bi_nvrtc_double.cpp b/test/test_airy_bi_nvrtc_double.cpp new file mode 100644 index 0000000000..f69e239163 --- /dev/null +++ b/test/test_airy_bi_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_airy_bi_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::airy_bi(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_airy_bi_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_airy_bi_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_airy_bi_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::airy_bi(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_airy_bi_nvrtc_float.cpp b/test/test_airy_bi_nvrtc_float.cpp new file mode 100644 index 0000000000..c28a5f5eb0 --- /dev/null +++ b/test/test_airy_bi_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_airy_bi_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::airy_bi(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_airy_bi_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_airy_bi_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_airy_bi_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::airy_bi(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_airy_bi_prime_double.cu b/test/test_airy_bi_prime_double.cu new file mode 100644 index 0000000000..a73e43f254 --- /dev/null +++ b/test/test_airy_bi_prime_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::airy_bi_prime(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::airy_bi_prime(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_airy_bi_prime_float.cu b/test/test_airy_bi_prime_float.cu new file mode 100644 index 0000000000..36874bccc7 --- /dev/null +++ b/test/test_airy_bi_prime_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::airy_bi_prime(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::airy_bi_prime(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_airy_bi_prime_nvrtc_double.cpp b/test/test_airy_bi_prime_nvrtc_double.cpp new file mode 100644 index 0000000000..802f63a292 --- /dev/null +++ b/test/test_airy_bi_prime_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_airy_bi_prime_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::airy_bi_prime(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_airy_bi_prime_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_airy_bi_prime_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_airy_bi_prime_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::airy_bi_prime(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_airy_bi_prime_nvrtc_float.cpp b/test/test_airy_bi_prime_nvrtc_float.cpp new file mode 100644 index 0000000000..e96aa48b97 --- /dev/null +++ b/test/test_airy_bi_prime_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_airy_bi_prime_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::airy_bi_prime(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_airy_bi_prime_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_airy_bi_prime_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_airy_bi_prime_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::airy_bi_prime(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_arcsine.cpp b/test/test_arcsine.cpp index 0c2d847a96..d1ac803080 100644 --- a/test/test_arcsine.cpp +++ b/test/test_arcsine.cpp @@ -10,7 +10,9 @@ // Tests for the arcsine Distribution. +#ifndef SYCL_LANGUAGE_VERSION #include // Must be 1st include, and include_directory /libs/math/src/tr1/ is needed. +#endif #ifdef _MSC_VER # pragma warning(disable: 4127) // Conditional expression is constant. @@ -20,7 +22,6 @@ #include // for real_concept. using ::boost::math::concepts::real_concept; -#include // for real_concept. #include // for arcsine_distribution. using boost::math::arcsine_distribution; @@ -42,6 +43,10 @@ using std::endl; #include using std::numeric_limits; +#if defined(BOOST_CHECK_THROW) && defined(BOOST_MATH_NO_EXCEPTIONS) +# undef BOOST_CHECK_THROW +# define BOOST_CHECK_THROW(x, y) +#endif template void test_ignore_policy(RealType) @@ -272,7 +277,7 @@ void test_spots(RealType) BOOST_CHECK_EQUAL(kurtosis_excess(arcsine_01), -1.5); // 3/2 BOOST_CHECK_EQUAL(support(arcsine_01).first, 0); // BOOST_CHECK_EQUAL(range(arcsine_01).first, 0); // - BOOST_MATH_CHECK_THROW(mode(arcsine_01), std::domain_error); // Two modes at x_min and x_max, so throw instead. + BOOST_CHECK_THROW(mode(arcsine_01), std::domain_error); // Two modes at x_min and x_max, so throw instead. // PDF // pdf of x = 1/4 is same as reflected value at x = 3/4. @@ -290,11 +295,13 @@ void test_spots(RealType) BOOST_CHECK_CLOSE_FRACTION(pdf(arcsine_01, 0.999999), static_cast(318.31004533885312973989414360099118178698415543136L), 100000 * tolerance);// Even less accurate. // Extreme x. + #ifndef BOOST_MATH_ENABLE_SYCL if (std::numeric_limits::has_infinity) { // BOOST_CHECK_EQUAL(pdf(arcsine_01, 0), informax()); // BOOST_CHECK_EQUAL(pdf(arcsine_01, 1), informax()); // } + #endif BOOST_CHECK_CLOSE_FRACTION(pdf(arcsine_01, tolerance), 1 /(sqrt(tolerance) * boost::math::constants::pi()), 2 * tolerance); // @@ -439,56 +446,56 @@ void test_spots(RealType) BOOST_CHECK_CLOSE_FRACTION(quantile(complement(as_m2m1, static_cast(0.85643370687129372924905811522494428117838480010259L))), -static_cast(1.95L), 4 * tolerance); // Tests that should throw: - BOOST_MATH_CHECK_THROW(mode(arcsine_distribution(static_cast(0), static_cast(1))), std::domain_error); + BOOST_CHECK_THROW(mode(arcsine_distribution(static_cast(0), static_cast(1))), std::domain_error); // mode is undefined, and must throw domain_error! - BOOST_MATH_CHECK_THROW( // For various bad arguments. + BOOST_CHECK_THROW( // For various bad arguments. pdf( arcsine_distribution(static_cast(+1), static_cast(-1)), // min_x > max_x static_cast(1)), std::domain_error); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( pdf( arcsine_distribution(static_cast(1), static_cast(0)), // bad constructor parameters. static_cast(1)), std::domain_error); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( pdf( arcsine_distribution(static_cast(1), static_cast(-1)), // bad constructor parameters. static_cast(1)), std::domain_error); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( pdf( arcsine_distribution(static_cast(1), static_cast(1)), // equal constructor parameters. static_cast(-1)), std::domain_error); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( pdf( arcsine_distribution(static_cast(0), static_cast(1)), // bad x > 1. static_cast(999)), std::domain_error); - BOOST_MATH_CHECK_THROW( // For various bad arguments. + BOOST_CHECK_THROW( // For various bad arguments. logpdf( arcsine_distribution(static_cast(+1), static_cast(-1)), // min_x > max_x static_cast(1)), std::domain_error); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( logpdf( arcsine_distribution(static_cast(1), static_cast(0)), // bad constructor parameters. static_cast(1)), std::domain_error); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( logpdf( arcsine_distribution(static_cast(1), static_cast(-1)), // bad constructor parameters. static_cast(1)), std::domain_error); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( logpdf( arcsine_distribution(static_cast(1), static_cast(1)), // equal constructor parameters. static_cast(-1)), std::domain_error); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( logpdf( arcsine_distribution(static_cast(0), static_cast(1)), // bad x > 1. static_cast(999)), std::domain_error); @@ -496,26 +503,26 @@ void test_spots(RealType) // Checks on things that are errors. // Construction with 'bad' parameters. - BOOST_MATH_CHECK_THROW(arcsine_distribution(+1, -1), std::domain_error); // max < min. - BOOST_MATH_CHECK_THROW(arcsine_distribution(+1, 0), std::domain_error); // max < min. + BOOST_CHECK_THROW(arcsine_distribution(+1, -1), std::domain_error); // max < min. + BOOST_CHECK_THROW(arcsine_distribution(+1, 0), std::domain_error); // max < min. arcsine_distribution<> dist; - BOOST_MATH_CHECK_THROW(pdf(dist, -1), std::domain_error); - BOOST_MATH_CHECK_THROW(logpdf(dist, -1), std::domain_error); - BOOST_MATH_CHECK_THROW(cdf(dist, -1), std::domain_error); - BOOST_MATH_CHECK_THROW(cdf(complement(dist, -1)), std::domain_error); - BOOST_MATH_CHECK_THROW(quantile(dist, -1), std::domain_error); - BOOST_MATH_CHECK_THROW(quantile(complement(dist, -1)), std::domain_error); - BOOST_MATH_CHECK_THROW(quantile(dist, -1), std::domain_error); - BOOST_MATH_CHECK_THROW(quantile(complement(dist, -1)), std::domain_error); + BOOST_CHECK_THROW(pdf(dist, -1), std::domain_error); + BOOST_CHECK_THROW(logpdf(dist, -1), std::domain_error); + BOOST_CHECK_THROW(cdf(dist, -1), std::domain_error); + BOOST_CHECK_THROW(cdf(complement(dist, -1)), std::domain_error); + BOOST_CHECK_THROW(quantile(dist, -1), std::domain_error); + BOOST_CHECK_THROW(quantile(complement(dist, -1)), std::domain_error); + BOOST_CHECK_THROW(quantile(dist, -1), std::domain_error); + BOOST_CHECK_THROW(quantile(complement(dist, -1)), std::domain_error); // Various combinations of bad constructor and member function parameters. - BOOST_MATH_CHECK_THROW(pdf(boost::math::arcsine_distribution(0, 1), -1), std::domain_error); - BOOST_MATH_CHECK_THROW(pdf(boost::math::arcsine_distribution(-1, 1), +2), std::domain_error); - BOOST_MATH_CHECK_THROW(logpdf(boost::math::arcsine_distribution(0, 1), -1), std::domain_error); - BOOST_MATH_CHECK_THROW(logpdf(boost::math::arcsine_distribution(-1, 1), +2), std::domain_error); - BOOST_MATH_CHECK_THROW(quantile(boost::math::arcsine_distribution(1, 1), -1), std::domain_error); - BOOST_MATH_CHECK_THROW(quantile(boost::math::arcsine_distribution(1, 1), 2), std::domain_error); + BOOST_CHECK_THROW(pdf(boost::math::arcsine_distribution(0, 1), -1), std::domain_error); + BOOST_CHECK_THROW(pdf(boost::math::arcsine_distribution(-1, 1), +2), std::domain_error); + BOOST_CHECK_THROW(logpdf(boost::math::arcsine_distribution(0, 1), -1), std::domain_error); + BOOST_CHECK_THROW(logpdf(boost::math::arcsine_distribution(-1, 1), +2), std::domain_error); + BOOST_CHECK_THROW(quantile(boost::math::arcsine_distribution(1, 1), -1), std::domain_error); + BOOST_CHECK_THROW(quantile(boost::math::arcsine_distribution(1, 1), 2), std::domain_error); // No longer allow any parameter to be NaN or inf, so all these tests should throw. if (std::numeric_limits::has_quiet_NaN) @@ -523,23 +530,23 @@ void test_spots(RealType) // Attempt to construct from non-finite parameters should throw. RealType nan = std::numeric_limits::quiet_NaN(); #ifndef BOOST_NO_EXCEPTIONS - BOOST_MATH_CHECK_THROW(arcsine_distribution w(nan), std::domain_error); - BOOST_MATH_CHECK_THROW(arcsine_distribution w(1, nan), std::domain_error); - BOOST_MATH_CHECK_THROW(arcsine_distribution w(nan, 1), std::domain_error); + BOOST_CHECK_THROW(arcsine_distribution w(nan), std::domain_error); + BOOST_CHECK_THROW(arcsine_distribution w(1, nan), std::domain_error); + BOOST_CHECK_THROW(arcsine_distribution w(nan, 1), std::domain_error); #else - BOOST_MATH_CHECK_THROW(arcsine_distribution(nan), std::domain_error); - BOOST_MATH_CHECK_THROW(arcsine_distribution(1, nan), std::domain_error); - BOOST_MATH_CHECK_THROW(arcsine_distribution(nan, 1), std::domain_error); + BOOST_CHECK_THROW(arcsine_distribution(nan), std::domain_error); + BOOST_CHECK_THROW(arcsine_distribution(1, nan), std::domain_error); + BOOST_CHECK_THROW(arcsine_distribution(nan, 1), std::domain_error); #endif arcsine_distribution w(RealType(-1), RealType(+1)); // NaN parameters to member functions should throw. - BOOST_MATH_CHECK_THROW(pdf(w, +nan), std::domain_error); // x = NaN - BOOST_MATH_CHECK_THROW(logpdf(w, +nan), std::domain_error); // x = NaN - BOOST_MATH_CHECK_THROW(cdf(w, +nan), std::domain_error); // x = NaN - BOOST_MATH_CHECK_THROW(cdf(complement(w, +nan)), std::domain_error); // x = + nan - BOOST_MATH_CHECK_THROW(quantile(w, +nan), std::domain_error); // p = + nan - BOOST_MATH_CHECK_THROW(quantile(complement(w, +nan)), std::domain_error); // p = + nan + BOOST_CHECK_THROW(pdf(w, +nan), std::domain_error); // x = NaN + BOOST_CHECK_THROW(logpdf(w, +nan), std::domain_error); // x = NaN + BOOST_CHECK_THROW(cdf(w, +nan), std::domain_error); // x = NaN + BOOST_CHECK_THROW(cdf(complement(w, +nan)), std::domain_error); // x = + nan + BOOST_CHECK_THROW(quantile(w, +nan), std::domain_error); // p = + nan + BOOST_CHECK_THROW(quantile(complement(w, +nan)), std::domain_error); // p = + nan } // has_quiet_NaN if (std::numeric_limits::has_infinity) @@ -547,27 +554,27 @@ void test_spots(RealType) // Attempt to construct from non-finite should throw. RealType inf = std::numeric_limits::infinity(); #ifndef BOOST_NO_EXCEPTIONS - BOOST_MATH_CHECK_THROW(arcsine_distribution w(inf), std::domain_error); - BOOST_MATH_CHECK_THROW(arcsine_distribution w(1, inf), std::domain_error); + BOOST_CHECK_THROW(arcsine_distribution w(inf), std::domain_error); + BOOST_CHECK_THROW(arcsine_distribution w(1, inf), std::domain_error); #else - BOOST_MATH_CHECK_THROW(arcsine_distribution(inf), std::domain_error); - BOOST_MATH_CHECK_THROW(arcsine_distribution(1, inf), std::domain_error); + BOOST_CHECK_THROW(arcsine_distribution(inf), std::domain_error); + BOOST_CHECK_THROW(arcsine_distribution(1, inf), std::domain_error); #endif // Infinite parameters to member functions should throw. arcsine_distribution w(RealType(0), RealType(1)); #ifndef BOOST_NO_EXCEPTIONS - BOOST_MATH_CHECK_THROW(arcsine_distribution w(inf), std::domain_error); - BOOST_MATH_CHECK_THROW(arcsine_distribution w(1, inf), std::domain_error); + BOOST_CHECK_THROW(arcsine_distribution w(inf), std::domain_error); + BOOST_CHECK_THROW(arcsine_distribution w(1, inf), std::domain_error); #else - BOOST_MATH_CHECK_THROW(arcsine_distribution(inf), std::domain_error); - BOOST_MATH_CHECK_THROW(arcsine_distribution(1, inf), std::domain_error); + BOOST_CHECK_THROW(arcsine_distribution(inf), std::domain_error); + BOOST_CHECK_THROW(arcsine_distribution(1, inf), std::domain_error); #endif - BOOST_MATH_CHECK_THROW(pdf(w, +inf), std::domain_error); // x = inf - BOOST_MATH_CHECK_THROW(logpdf(w, +inf), std::domain_error); // x = inf - BOOST_MATH_CHECK_THROW(cdf(w, +inf), std::domain_error); // x = inf - BOOST_MATH_CHECK_THROW(cdf(complement(w, +inf)), std::domain_error); // x = + inf - BOOST_MATH_CHECK_THROW(quantile(w, +inf), std::domain_error); // p = + inf - BOOST_MATH_CHECK_THROW(quantile(complement(w, +inf)), std::domain_error); // p = + inf + BOOST_CHECK_THROW(pdf(w, +inf), std::domain_error); // x = inf + BOOST_CHECK_THROW(logpdf(w, +inf), std::domain_error); // x = inf + BOOST_CHECK_THROW(cdf(w, +inf), std::domain_error); // x = inf + BOOST_CHECK_THROW(cdf(complement(w, +inf)), std::domain_error); // x = + inf + BOOST_CHECK_THROW(quantile(w, +inf), std::domain_error); // p = + inf + BOOST_CHECK_THROW(quantile(complement(w, +inf)), std::domain_error); // p = + inf } // has_infinity // Error handling checks: @@ -601,7 +608,7 @@ void test_spots(RealType) BOOST_CHECK_EQUAL(kurtosis_excess(as), -1.5); // 3/2 BOOST_CHECK_EQUAL(support(as).first, 0); // BOOST_CHECK_EQUAL(range(as).first, 0); // - BOOST_MATH_CHECK_THROW(mode(as), std::domain_error); // Two modes at x_min and x_max, so throw instead. + BOOST_CHECK_THROW(mode(as), std::domain_error); // Two modes at x_min and x_max, so throw instead. // (Parameter value, arbitrarily zero, only communicates the floating point type). test_spots(0.0F); // Test float. diff --git a/test/test_arcsine_cdf_double.cu b/test/test_arcsine_cdf_double.cu new file mode 100644 index 0000000000..3ac9e22cd0 --- /dev/null +++ b/test/test_arcsine_cdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::arcsine_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::arcsine_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_arcsine_cdf_float.cu b/test/test_arcsine_cdf_float.cu new file mode 100644 index 0000000000..cc73ce95bd --- /dev/null +++ b/test/test_arcsine_cdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::arcsine_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::arcsine_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_arcsine_cdf_nvrtc_double.cpp b/test/test_arcsine_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..58b6b8297f --- /dev/null +++ b/test/test_arcsine_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::arcsine_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cauchy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::arcsine_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_arcsine_cdf_nvrtc_float.cpp b/test/test_arcsine_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..3f8b04bb0b --- /dev/null +++ b/test/test_arcsine_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::arcsine_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cauchy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::arcsine_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_arcsine_pdf_double.cu b/test/test_arcsine_pdf_double.cu new file mode 100644 index 0000000000..8f45017ba8 --- /dev/null +++ b/test/test_arcsine_pdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::arcsine_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::arcsine_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_arcsine_pdf_float.cu b/test/test_arcsine_pdf_float.cu new file mode 100644 index 0000000000..c236b7876f --- /dev/null +++ b/test/test_arcsine_pdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::arcsine_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::arcsine_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_arcsine_pdf_nvrtc_double.cpp b/test/test_arcsine_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..c76b47f883 --- /dev/null +++ b/test/test_arcsine_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::arcsine_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cauchy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::arcsine_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_arcsine_pdf_nvrtc_float.cpp b/test/test_arcsine_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..e9145a1624 --- /dev/null +++ b/test/test_arcsine_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::arcsine_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cauchy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::arcsine_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_arcsine_quan_double.cu b/test/test_arcsine_quan_double.cu new file mode 100644 index 0000000000..a457370635 --- /dev/null +++ b/test/test_arcsine_quan_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::arcsine_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::arcsine_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_arcsine_quan_float.cu b/test/test_arcsine_quan_float.cu new file mode 100644 index 0000000000..fd8cd11fcc --- /dev/null +++ b/test/test_arcsine_quan_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::arcsine_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::arcsine_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_arcsine_quantile_nvrtc_double.cpp b/test/test_arcsine_quantile_nvrtc_double.cpp new file mode 100644 index 0000000000..ba8e2e5df5 --- /dev/null +++ b/test/test_arcsine_quantile_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::arcsine_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cauchy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::arcsine_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_arcsine_quantile_nvrtc_float.cpp b/test/test_arcsine_quantile_nvrtc_float.cpp new file mode 100644 index 0000000000..1fd2e4884a --- /dev/null +++ b/test/test_arcsine_quantile_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::arcsine_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cauchy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::arcsine_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_arcsine_range_support_double.cu b/test/test_arcsine_range_support_double.cu new file mode 100644 index 0000000000..b3fb575faa --- /dev/null +++ b/test/test_arcsine_range_support_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type* in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = range(boost::math::arcsine_distribution(in1[i])).first + support(boost::math::arcsine_distribution(in1[i])).second; + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(range(boost::math::arcsine_distribution(input_vector1[i])).first + support(boost::math::arcsine_distribution(input_vector1[i])).second); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_arcsine_range_support_float.cu b/test/test_arcsine_range_support_float.cu new file mode 100644 index 0000000000..d207d0598e --- /dev/null +++ b/test/test_arcsine_range_support_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type* in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = range(boost::math::arcsine_distribution(in1[i])).first + support(boost::math::arcsine_distribution(in1[i])).second; + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(range(boost::math::arcsine_distribution(input_vector1[i])).first + support(boost::math::arcsine_distribution(input_vector1[i])).second); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_autodiff_2.cpp b/test/test_autodiff_2.cpp index 79ee24ba0d..7315e7dc1b 100644 --- a/test/test_autodiff_2.cpp +++ b/test/test_autodiff_2.cpp @@ -522,6 +522,17 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(asinh_test, T, bin_float_types) { -39 / (16 * boost::math::constants::root_two()), eps); } +template +static T atan2_wrap(T x, T y) +{ + return atan2(x, y); +} + +static long double atan2_wrap(long double x, long double y) +{ + return std::atan2(x, y); +} + BOOST_AUTO_TEST_CASE_TEMPLATE(atan2_function, T, all_float_types) { using test_constants = test_constants_t; using std::atan2; @@ -536,7 +547,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(atan2_function, T, all_float_types) { auto y = y_sampler.next(); auto autodiff_v = atan2(make_fvar(x), make_fvar(y)); - auto anchor_v = atan2(x, y); + auto anchor_v = atan2_wrap(x, y); BOOST_CHECK_CLOSE(autodiff_v, anchor_v, 5000 * test_constants::pct_epsilon()); } diff --git a/test/test_bernoulli.cpp b/test/test_bernoulli.cpp index d8c663399a..8513cec36d 100644 --- a/test/test_bernoulli.cpp +++ b/test/test_bernoulli.cpp @@ -2,6 +2,7 @@ // Copyright John Maddock 2006. // Copyright Paul A. Bristow 2007, 2012. +// Copyright Matt Borland 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. @@ -22,7 +23,7 @@ #include // for real_concept using ::boost::math::concepts::real_concept; -#include +#include "../include_private/boost/math/tools/test.hpp" #include // for bernoulli_distribution using boost::math::bernoulli_distribution; @@ -74,6 +75,7 @@ void test_spots(RealType) BOOST_CHECK_EQUAL(bernoulli_distribution(static_cast(0.1L)).success_fraction(), static_cast(0.1L)); BOOST_CHECK_EQUAL(bernoulli_distribution(static_cast(0.9L)).success_fraction(), static_cast(0.9L)); +#ifndef BOOST_MATH_NO_EXCEPTIONS BOOST_MATH_CHECK_THROW( // Constructor success_fraction outside 0 to 1. bernoulli_distribution(static_cast(2)), std::domain_error); BOOST_MATH_CHECK_THROW( @@ -86,7 +88,8 @@ void test_spots(RealType) BOOST_MATH_CHECK_THROW( pdf( // pdf k neither 0 nor 1. bernoulli_distribution(static_cast(0.25L)), static_cast(2)), std::domain_error); - +#endif + BOOST_CHECK_EQUAL( pdf( // OK k (or n) bernoulli_distribution(static_cast(0.5L)), static_cast(0)), @@ -134,6 +137,7 @@ void test_spots(RealType) static_cast(5.11111111111111111111111111111111111111111111L), tolerance); +#ifndef BOOST_MATH_NO_EXCEPTIONS BOOST_MATH_CHECK_THROW( quantile( bernoulli_distribution(static_cast(2)), // prob >1 @@ -154,6 +158,7 @@ void test_spots(RealType) bernoulli_distribution(static_cast(0.5L)), // k < 0 static_cast(2)), std::domain_error ); +#endif BOOST_CHECK_CLOSE_FRACTION( cdf( @@ -217,6 +222,7 @@ void test_spots(RealType) // Checks for 'bad' parameters. // Construction. + #ifndef BOOST_MATH_NO_EXCEPTIONS BOOST_MATH_CHECK_THROW(bernoulli_distribution(-1), std::domain_error); // p outside 0 to 1. BOOST_MATH_CHECK_THROW(bernoulli_distribution(+2), std::domain_error); // p outside 0 to 1. @@ -269,7 +275,7 @@ void test_spots(RealType) BOOST_MATH_CHECK_THROW(quantile(w, +inf), std::domain_error); // p = + inf BOOST_MATH_CHECK_THROW(quantile(complement(w, +inf)), std::domain_error); // p = + inf } // has_infinity - + #endif } // template void test_spots(RealType) BOOST_AUTO_TEST_CASE( test_main ) @@ -302,7 +308,9 @@ BOOST_AUTO_TEST_CASE( test_main ) // (Parameter value, arbitrarily zero, only communicates the floating point type). test_spots(0.0F); // Test float. test_spots(0.0); // Test double. +#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS test_spots(0.0L); // Test long double. +#endif #if !BOOST_WORKAROUND(BOOST_BORLANDC, BOOST_TESTED_AT(0x582)) && !defined(BOOST_MATH_NO_REAL_CONCEPT_TESTS) test_spots(boost::math::concepts::real_concept(0.)); // Test real concept. #endif diff --git a/test/test_bernoulli_cdf_double.cu b/test/test_bernoulli_cdf_double.cu new file mode 100644 index 0000000000..1a6dce645e --- /dev/null +++ b/test/test_bernoulli_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::bernoulli_distribution(in1[i]), static_cast(1)); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::bernoulli_distribution(input_vector1[i]), static_cast(1))); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_bernoulli_cdf_float.cu b/test/test_bernoulli_cdf_float.cu new file mode 100644 index 0000000000..998f247361 --- /dev/null +++ b/test/test_bernoulli_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::bernoulli_distribution(in1[i]), static_cast(1)); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::bernoulli_distribution(input_vector1[i]), static_cast(1))); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_bernoulli_cdf_nvrtc_double.cpp b/test/test_bernoulli_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..887c3430a0 --- /dev/null +++ b/test/test_bernoulli_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::bernoulli_distribution(), round(in1[i])); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cauchy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::bernoulli_distribution(), round(h_in1[i])); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bernoulli_cdf_nvrtc_float.cpp b/test/test_bernoulli_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..1a38c43480 --- /dev/null +++ b/test/test_bernoulli_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::bernoulli_distribution(), round(in1[i])); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cauchy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::bernoulli_distribution(), round(h_in1[i])); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bernoulli_constants.cpp b/test/test_bernoulli_constants.cpp index 6d73d82a4c..0c5c81b62f 100644 --- a/test/test_bernoulli_constants.cpp +++ b/test/test_bernoulli_constants.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include "table_type.hpp" #include #include #include @@ -22,114 +22,114 @@ void test(const char* name) { std::cout << "Testing type " << name << ":\n"; - static const typename table_type::type data[] = + static const typename table_type::type data[] = { /* First 50 from 2 to 100 inclusive: */ /* TABLE[N[BernoulliB[n], 200], {n,2,100,2}] */ - SC_(0.1666666666666666666666666666666666666666), - SC_(-0.0333333333333333333333333333333333333333), - SC_(0.0238095238095238095238095238095238095238), - SC_(-0.0333333333333333333333333333333333333333), - SC_(0.0757575757575757575757575757575757575757), - SC_(-0.2531135531135531135531135531135531135531), - SC_(1.1666666666666666666666666666666666666666), - SC_(-7.0921568627450980392156862745098039215686), - SC_(54.9711779448621553884711779448621553884711), - SC_(-529.1242424242424242424242424242424242424242), - SC_(6192.1231884057971014492753623188405797101449), - SC_(-86580.2531135531135531135531135531135531135531), - SC_(1.4255171666666666666666666666666666666666e6), - SC_(-2.7298231067816091954022988505747126436781e7), - SC_(6.0158087390064236838430386817483591677140e8), - SC_(-1.5116315767092156862745098039215686274509e10), - SC_(4.2961464306116666666666666666666666666666e11), - SC_(-1.3711655205088332772159087948561632772159e13), - SC_(4.8833231897359316666666666666666666666666e14), - SC_(-1.9296579341940068148632668144863266814486e16), - SC_(8.4169304757368261500055370985603543743078e17), - SC_(-4.0338071854059455413076811594202898550724e19), - SC_(2.1150748638081991605601453900709219858156e21), - SC_(-1.2086626522296525934602731193708252531781e23), - SC_(7.5008667460769643668557200757575757575757e24), - SC_(-5.0387781014810689141378930305220125786163e26), - SC_(3.6528776484818123335110430842971177944862e28), - SC_(-2.8498769302450882226269146432910678160919e30), - SC_(2.3865427499683627644645981919219214971751e32), - SC_(-2.1399949257225333665810744765191097392674e34), - SC_(2.0500975723478097569921733095672310251666e36), - SC_(-2.0938005911346378409095185290027970184709e38), - SC_(2.2752696488463515559649260352769264581469e40), - SC_(-2.6257710286239576047303049736158202081449e42), - SC_(3.2125082102718032518204792304264985243521e44), - SC_(-4.1598278166794710913917074495262358936689e46), - SC_(5.6920695482035280023883456219121058644480e48), - SC_(-8.2183629419784575692290653468617333014550e50), - SC_(1.2502904327166993016732339829702895524177e53), - SC_(-2.0015583233248370274925329198813298768724e55), - SC_(3.3674982915364374233396676903338753016219e57), - SC_(-5.9470970503135447718660496844051540840579e59), - SC_(1.1011910323627977559564130790437691604630e62), - SC_(-2.1355259545253501188658385019041065678973e64), - SC_(4.3328896986641192419616613059379206218451e66), - SC_(-9.1885528241669328226200555215501897138960e68), - SC_(2.0346896776329074493455027990220020065975e71), - SC_(-4.7003833958035731078575255535006060654596e73), - SC_(1.1318043445484249270675186257733934267890e76), + SC_(0.1666666666666666666666666666666666666666), + SC_(-0.0333333333333333333333333333333333333333), + SC_(0.0238095238095238095238095238095238095238), + SC_(-0.0333333333333333333333333333333333333333), + SC_(0.0757575757575757575757575757575757575757), + SC_(-0.2531135531135531135531135531135531135531), + SC_(1.1666666666666666666666666666666666666666), + SC_(-7.0921568627450980392156862745098039215686), + SC_(54.9711779448621553884711779448621553884711), + SC_(-529.1242424242424242424242424242424242424242), + SC_(6192.1231884057971014492753623188405797101449), + SC_(-86580.2531135531135531135531135531135531135531), + SC_(1.4255171666666666666666666666666666666666e6), + SC_(-2.7298231067816091954022988505747126436781e7), + SC_(6.0158087390064236838430386817483591677140e8), + SC_(-1.5116315767092156862745098039215686274509e10), + SC_(4.2961464306116666666666666666666666666666e11), + SC_(-1.3711655205088332772159087948561632772159e13), + SC_(4.8833231897359316666666666666666666666666e14), + SC_(-1.9296579341940068148632668144863266814486e16), + SC_(8.4169304757368261500055370985603543743078e17), + SC_(-4.0338071854059455413076811594202898550724e19), + SC_(2.1150748638081991605601453900709219858156e21), + SC_(-1.2086626522296525934602731193708252531781e23), + SC_(7.5008667460769643668557200757575757575757e24), + SC_(-5.0387781014810689141378930305220125786163e26), + SC_(3.6528776484818123335110430842971177944862e28), + SC_(-2.8498769302450882226269146432910678160919e30), + SC_(2.3865427499683627644645981919219214971751e32), + SC_(-2.1399949257225333665810744765191097392674e34), + SC_(2.0500975723478097569921733095672310251666e36), + SC_(-2.0938005911346378409095185290027970184709e38), + SC_(2.2752696488463515559649260352769264581469e40), + SC_(-2.6257710286239576047303049736158202081449e42), + SC_(3.2125082102718032518204792304264985243521e44), + SC_(-4.1598278166794710913917074495262358936689e46), + SC_(5.6920695482035280023883456219121058644480e48), + SC_(-8.2183629419784575692290653468617333014550e50), + SC_(1.2502904327166993016732339829702895524177e53), + SC_(-2.0015583233248370274925329198813298768724e55), + SC_(3.3674982915364374233396676903338753016219e57), + SC_(-5.9470970503135447718660496844051540840579e59), + SC_(1.1011910323627977559564130790437691604630e62), + SC_(-2.1355259545253501188658385019041065678973e64), + SC_(4.3328896986641192419616613059379206218451e66), + SC_(-9.1885528241669328226200555215501897138960e68), + SC_(2.0346896776329074493455027990220020065975e71), + SC_(-4.7003833958035731078575255535006060654596e73), + SC_(1.1318043445484249270675186257733934267890e76), SC_(-2.8382249570693706959264156336481764738284e78), /* next 50 from 102 to 200: */ /* TABLE[N[BernoulliB[n], 200], {n,102,200,2}] */ - SC_(7.4064248979678850629750827140920984176879e80), - SC_(-2.0096454802756604483465619672715363186867e83), - SC_(5.6657170050805941445719346030519356961419e85), - SC_(-1.6584511154136216915823713374319912301494e88), - SC_(5.0368859950492377419289421915180154812442e90), - SC_(-1.5861468237658186369363401572966438782740e93), - SC_(5.1756743617545626984073240682507122561240e95), - SC_(-1.7488921840217117339690025877618159145141e98), - SC_(6.1160519994952185255824525264264167780767e100), - SC_(-2.2122776912707834942288323456712932445573e103), - SC_(8.2722776798770969854221062459984595731204e105), - SC_(-3.1958925111415709583591634369180814873526e108), - SC_(1.2750082223387792982310024302926679866957e111), - SC_(-5.2500923086774133899402824624565175446919e113), - SC_(2.2301817894241625209869298198838728143738e116), - SC_(-9.7684521930955204438633513398980239301166e118), - SC_(4.4098361978452954272272622874813169191875e121), - SC_(-2.0508570886464088839729337727583015486456e124), - SC_(9.8214433279791277107572969602097521041491e126), - SC_(-4.8412600798208880508789196709963412761130e129), - SC_(2.4553088801480982609783467404088690399673e132), - SC_(-1.2806926804084747548782513278601785721811e135), - SC_(6.8676167104668581192101888598464400436092e137), - SC_(-3.7846468581969104694978995416379556814489e140), - SC_(2.1426101250665291550871323135148272096660e143), - SC_(-1.2456727137183695007019642961637607219458e146), - SC_(7.4345787551000152543679668394052061311780e148), - SC_(-4.5535795304641704894063333223321274876772e151), - SC_(2.8612112816858868345363847251017232522918e154), - SC_(-1.8437723552033869727688202653628785487541e157), - SC_(1.2181154536221046699501316506599521355817e160), - SC_(-8.2482187185314121548481845729689344730141e162), - SC_(5.7225877937832943329651649814297861591868e165), - SC_(-4.0668530525059104726767969383115865560219e168), - SC_(2.9596092064642050062875269581585187042637e171), - SC_(-2.2049522565189457509031175227344598483637e174), - SC_(1.6812597072889599805831152515136066575446e177), - SC_(-1.3116736213556957648645280635581715300443e180), - SC_(1.0467894009478038082183285392982308964382e183), - SC_(-8.5432893578833707718598254629908277459327e185), - SC_(7.1287821322486542352288406677143822472124e188), - SC_(-6.0802931455535899300084711868647745846198e191), - SC_(5.2996776424849923930094291004324726622848e194), - SC_(-4.7194259168745862644364622901337991110376e197), - SC_(4.2928413791402981089416829654107466904552e200), - SC_(-3.9876744968232207443447765554293879510665e203), - SC_(3.7819780419358882713894418116139332789822e206), - SC_(-3.6614233683681191243685808215119734875519e209), - SC_(3.6176090272372862348855460929891408947754e212), + SC_(7.4064248979678850629750827140920984176879e80), + SC_(-2.0096454802756604483465619672715363186867e83), + SC_(5.6657170050805941445719346030519356961419e85), + SC_(-1.6584511154136216915823713374319912301494e88), + SC_(5.0368859950492377419289421915180154812442e90), + SC_(-1.5861468237658186369363401572966438782740e93), + SC_(5.1756743617545626984073240682507122561240e95), + SC_(-1.7488921840217117339690025877618159145141e98), + SC_(6.1160519994952185255824525264264167780767e100), + SC_(-2.2122776912707834942288323456712932445573e103), + SC_(8.2722776798770969854221062459984595731204e105), + SC_(-3.1958925111415709583591634369180814873526e108), + SC_(1.2750082223387792982310024302926679866957e111), + SC_(-5.2500923086774133899402824624565175446919e113), + SC_(2.2301817894241625209869298198838728143738e116), + SC_(-9.7684521930955204438633513398980239301166e118), + SC_(4.4098361978452954272272622874813169191875e121), + SC_(-2.0508570886464088839729337727583015486456e124), + SC_(9.8214433279791277107572969602097521041491e126), + SC_(-4.8412600798208880508789196709963412761130e129), + SC_(2.4553088801480982609783467404088690399673e132), + SC_(-1.2806926804084747548782513278601785721811e135), + SC_(6.8676167104668581192101888598464400436092e137), + SC_(-3.7846468581969104694978995416379556814489e140), + SC_(2.1426101250665291550871323135148272096660e143), + SC_(-1.2456727137183695007019642961637607219458e146), + SC_(7.4345787551000152543679668394052061311780e148), + SC_(-4.5535795304641704894063333223321274876772e151), + SC_(2.8612112816858868345363847251017232522918e154), + SC_(-1.8437723552033869727688202653628785487541e157), + SC_(1.2181154536221046699501316506599521355817e160), + SC_(-8.2482187185314121548481845729689344730141e162), + SC_(5.7225877937832943329651649814297861591868e165), + SC_(-4.0668530525059104726767969383115865560219e168), + SC_(2.9596092064642050062875269581585187042637e171), + SC_(-2.2049522565189457509031175227344598483637e174), + SC_(1.6812597072889599805831152515136066575446e177), + SC_(-1.3116736213556957648645280635581715300443e180), + SC_(1.0467894009478038082183285392982308964382e183), + SC_(-8.5432893578833707718598254629908277459327e185), + SC_(7.1287821322486542352288406677143822472124e188), + SC_(-6.0802931455535899300084711868647745846198e191), + SC_(5.2996776424849923930094291004324726622848e194), + SC_(-4.7194259168745862644364622901337991110376e197), + SC_(4.2928413791402981089416829654107466904552e200), + SC_(-3.9876744968232207443447765554293879510665e203), + SC_(3.7819780419358882713894418116139332789822e206), + SC_(-3.6614233683681191243685808215119734875519e209), + SC_(3.6176090272372862348855460929891408947754e212), SC_(-3.6470772645191354362138308865549944904868e215), }; diff --git a/test/test_bernoulli_pdf_double.cu b/test/test_bernoulli_pdf_double.cu new file mode 100644 index 0000000000..147e2f3401 --- /dev/null +++ b/test/test_bernoulli_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::bernoulli_distribution(in1[i]), static_cast(1)); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::bernoulli_distribution(input_vector1[i]), static_cast(1))); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_bernoulli_pdf_float.cu b/test/test_bernoulli_pdf_float.cu new file mode 100644 index 0000000000..49eaea32f9 --- /dev/null +++ b/test/test_bernoulli_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::bernoulli_distribution(in1[i]), static_cast(1)); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::bernoulli_distribution(input_vector1[i]), static_cast(1))); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_bernoulli_pdf_nvrtc_double.cpp b/test/test_bernoulli_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..f3e21a0951 --- /dev/null +++ b/test/test_bernoulli_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::bernoulli_distribution(), round(in1[i])); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cauchy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::bernoulli_distribution(), round(h_in1[i])); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bernoulli_pdf_nvrtc_float.cpp b/test/test_bernoulli_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..bf9b760168 --- /dev/null +++ b/test/test_bernoulli_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::bernoulli_distribution(), round(in1[i])); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cauchy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::bernoulli_distribution(), round(h_in1[i])); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bernoulli_quan_nvrtc_double.cpp b/test/test_bernoulli_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..dcdd0e1f1b --- /dev/null +++ b/test/test_bernoulli_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::bernoulli_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cauchy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::bernoulli_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bernoulli_quan_nvrtc_float.cpp b/test/test_bernoulli_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..24c4923818 --- /dev/null +++ b/test/test_bernoulli_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::bernoulli_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cauchy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::bernoulli_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bernoulli_range_support_double.cu b/test/test_bernoulli_range_support_double.cu new file mode 100644 index 0000000000..ade952fca3 --- /dev/null +++ b/test/test_bernoulli_range_support_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type* in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = range(boost::math::bernoulli_distribution(in1[i])).first + support(boost::math::bernoulli_distribution(in1[i])).second; + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(range(boost::math::bernoulli_distribution(input_vector1[i])).first + support(boost::math::bernoulli_distribution(input_vector1[i])).second); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_bernoulli_range_support_float.cu b/test/test_bernoulli_range_support_float.cu new file mode 100644 index 0000000000..ef276b9384 --- /dev/null +++ b/test/test_bernoulli_range_support_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type* in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = range(boost::math::bernoulli_distribution(in1[i])).first + support(boost::math::bernoulli_distribution(in1[i])).second; + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(range(boost::math::bernoulli_distribution(input_vector1[i])).first + support(boost::math::bernoulli_distribution(input_vector1[i])).second); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_bessel_i.cpp b/test/test_bessel_i.cpp index 68dcab0a5d..817569760a 100644 --- a/test/test_bessel_i.cpp +++ b/test/test_bessel_i.cpp @@ -3,7 +3,21 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#else +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false +#include +#endif + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wliteral-range" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Woverflow" +#endif + #include "test_bessel_i.hpp" // @@ -82,7 +96,11 @@ void expected_results() "linux", // platform largest_type, // test type(s) ".*Random.*", // test data group + #ifdef SYCL_LANGUAGE_VERSION + ".*", 600, 200); + #else ".*", 400, 200); // test function + #endif add_expected_result( "GNU.*", // compiler @@ -111,7 +129,11 @@ void expected_results() ".*", // platform largest_type, // test type(s) ".*", // test data group + #ifdef SYCL_LANGUAGE_VERSION + ".*", 400, 200); + #else ".*", 20, 10); // test function + #endif // // Set error rates a little higher for real_concept - // now that we use a series approximation for small z diff --git a/test/test_bessel_i.hpp b/test/test_bessel_i.hpp index 2da559f320..aa4f6a4ea3 100644 --- a/test/test_bessel_i.hpp +++ b/test/test_bessel_i.hpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include "functor.hpp" @@ -180,7 +181,10 @@ void test_bessel(T, const char* name) // // Special cases for full coverage: // + #ifndef BOOST_MATH_NO_EXCEPTIONS BOOST_CHECK_THROW(boost::math::cyl_bessel_i(T(-2.5), T(-2.5)), std::domain_error); + #endif + BOOST_CHECK_EQUAL(boost::math::cyl_bessel_i(T(0), T(0)), T(1)); BOOST_CHECK_EQUAL(boost::math::cyl_bessel_i(T(10), T(0)), T(0)); BOOST_CHECK_EQUAL(boost::math::cyl_bessel_i(T(-10), T(0)), T(0)); @@ -197,10 +201,12 @@ void test_bessel(T, const char* name) } } T tolerance = boost::math::tools::epsilon() * 100; +#ifndef SYCL_LANGUAGE_VERSION if ((boost::math::tools::digits() <= std::numeric_limits::digits) && (std::numeric_limits::max_exponent > 1000)) { BOOST_CHECK_CLOSE_FRACTION(boost::math::cyl_bessel_i(T(0.5), T(710)), SC_(3.3447452278080108123142599104927325061327359278058601201179e306), tolerance); } +#endif #if LDBL_MAX_EXP >= 11356 BOOST_IF_CONSTEXPR (std::numeric_limits::max_exponent >= 11356) { diff --git a/test/test_bessel_i0_double.cu b/test/test_bessel_i0_double.cu new file mode 100644 index 0000000000..1c5d0ca14b --- /dev/null +++ b/test/test_bessel_i0_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::detail::bessel_i0(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::detail::bessel_i0(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_bessel_i0_float.cu b/test/test_bessel_i0_float.cu new file mode 100644 index 0000000000..39929d5481 --- /dev/null +++ b/test/test_bessel_i0_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::detail::bessel_i0(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::detail::bessel_i0(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_bessel_i0_nvrtc_double.cpp b/test/test_bessel_i0_nvrtc_double.cpp new file mode 100644 index 0000000000..0c5db47b49 --- /dev/null +++ b/test/test_bessel_i0_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_bessel_i0_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_i0(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_i0_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_bessel_i0_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_i0_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::detail::bessel_i0(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bessel_i0_nvrtc_float.cpp b/test/test_bessel_i0_nvrtc_float.cpp new file mode 100644 index 0000000000..26d667b973 --- /dev/null +++ b/test/test_bessel_i0_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_bessel_i0_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_i0(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_i0_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_bessel_i0_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_i0_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::detail::bessel_i0(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bessel_i1_double.cu b/test/test_bessel_i1_double.cu new file mode 100644 index 0000000000..e4d6443a68 --- /dev/null +++ b/test/test_bessel_i1_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::detail::bessel_i1(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::detail::bessel_i1(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_bessel_i1_float.cu b/test/test_bessel_i1_float.cu new file mode 100644 index 0000000000..12ae535428 --- /dev/null +++ b/test/test_bessel_i1_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::detail::bessel_i1(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::detail::bessel_i1(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_bessel_i1_nvrtc_double.cpp b/test/test_bessel_i1_nvrtc_double.cpp new file mode 100644 index 0000000000..c270a66940 --- /dev/null +++ b/test/test_bessel_i1_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_bessel_i1_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_i1(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_i1_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_bessel_i1_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_i1_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::detail::bessel_i1(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bessel_i1_nvrtc_float.cpp b/test/test_bessel_i1_nvrtc_float.cpp new file mode 100644 index 0000000000..158c6a8159 --- /dev/null +++ b/test/test_bessel_i1_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_bessel_i1_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_i1(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_i1_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_bessel_i1_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_i1_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::detail::bessel_i1(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bessel_j.cpp b/test/test_bessel_j.cpp index 19a5f7426e..1dd63a68a5 100644 --- a/test/test_bessel_j.cpp +++ b/test/test_bessel_j.cpp @@ -3,7 +3,20 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#else +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false +#include +#endif + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wliteral-range" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Woverflow" +#endif #include "test_bessel_j.hpp" diff --git a/test/test_bessel_j.hpp b/test/test_bessel_j.hpp index 82106213ea..c0b719ad89 100644 --- a/test/test_bessel_j.hpp +++ b/test/test_bessel_j.hpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -279,7 +280,9 @@ void test_bessel(T, const char* name) BOOST_MATH_CHECK_THROW(boost::math::sph_bessel(2, T(-2.0)), std::domain_error); BOOST_CHECK_EQUAL(boost::math::cyl_bessel_j(T(0), T(2.5)), boost::math::cyl_bessel_j(T(0), T(-2.5))); BOOST_CHECK_EQUAL(boost::math::cyl_bessel_j(T(1), T(2.5)), -boost::math::cyl_bessel_j(T(1), T(-2.5))); + #ifndef SYCL_LANGUAGE_VERSION BOOST_CHECK_CLOSE_FRACTION(boost::math::cyl_bessel_j(364, T(38.5)), SC_(1.793940496519190500748409872348034004417458734118663909894e-309), tolerance); + #endif // // Special cases at infinity: // diff --git a/test/test_bessel_j0_double.cu b/test/test_bessel_j0_double.cu new file mode 100644 index 0000000000..d32474d964 --- /dev/null +++ b/test/test_bessel_j0_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::detail::bessel_j0(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::detail::bessel_j0(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_bessel_j0_float.cu b/test/test_bessel_j0_float.cu new file mode 100644 index 0000000000..48c6b9e399 --- /dev/null +++ b/test/test_bessel_j0_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::detail::bessel_j0(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::detail::bessel_j0(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_bessel_j0_nvrtc_double.cpp b/test/test_bessel_j0_nvrtc_double.cpp new file mode 100644 index 0000000000..8c8b798410 --- /dev/null +++ b/test/test_bessel_j0_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_bessel_j0_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_j0(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_j0_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_bessel_j0_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_j0_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::detail::bessel_j0(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bessel_j0_nvrtc_float.cpp b/test/test_bessel_j0_nvrtc_float.cpp new file mode 100644 index 0000000000..4a54b1eaa8 --- /dev/null +++ b/test/test_bessel_j0_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_bessel_j0_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_j0(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_j0_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_bessel_j0_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_j0_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::detail::bessel_j0(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bessel_j1_double.cu b/test/test_bessel_j1_double.cu new file mode 100644 index 0000000000..33a6e71b6e --- /dev/null +++ b/test/test_bessel_j1_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::detail::bessel_j1(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::detail::bessel_j1(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_bessel_j1_float.cu b/test/test_bessel_j1_float.cu new file mode 100644 index 0000000000..14dd37be31 --- /dev/null +++ b/test/test_bessel_j1_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::detail::bessel_j1(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::detail::bessel_j1(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_bessel_j1_nvrtc_double.cpp b/test/test_bessel_j1_nvrtc_double.cpp new file mode 100644 index 0000000000..11460c11da --- /dev/null +++ b/test/test_bessel_j1_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_bessel_j1_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_j1(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_j1_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_bessel_j1_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_j1_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::detail::bessel_j1(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bessel_j1_nvrtc_float.cpp b/test/test_bessel_j1_nvrtc_float.cpp new file mode 100644 index 0000000000..8f7cc6e3fe --- /dev/null +++ b/test/test_bessel_j1_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_bessel_j1_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_j1(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_j1_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_bessel_j1_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_j1_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::detail::bessel_j1(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bessel_k.cpp b/test/test_bessel_k.cpp index f0975b46d2..6c31f5ab05 100644 --- a/test/test_bessel_k.cpp +++ b/test/test_bessel_k.cpp @@ -5,13 +5,26 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#else +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false +#include +#endif #ifdef _MSC_VER # pragma warning(disable : 4756) // overflow in constant arithmetic // Constants are too big for float case, but this doesn't matter for test. #endif +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wliteral-range" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Woverflow" +#endif + #include "test_bessel_k.hpp" // diff --git a/test/test_bessel_k.hpp b/test/test_bessel_k.hpp index 22df3218f0..6a2a8179d9 100644 --- a/test/test_bessel_k.hpp +++ b/test/test_bessel_k.hpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include "functor.hpp" @@ -175,6 +176,7 @@ void test_bessel(T, const char* name) // // Extra test coverage: // + #ifndef SYCL_LANGUAGE_VERSION // SYCL doesn't throw BOOST_CHECK_THROW(boost::math::cyl_bessel_k(T(2), T(-1)), std::domain_error); BOOST_CHECK_THROW(boost::math::cyl_bessel_k(T(2.2), T(-1)), std::domain_error); BOOST_IF_CONSTEXPR(std::numeric_limits::has_infinity) @@ -194,6 +196,7 @@ void test_bessel(T, const char* name) BOOST_CHECK_THROW(boost::math::cyl_bessel_k(T(-1.25), T(0)), std::domain_error); BOOST_CHECK_THROW(boost::math::cyl_bessel_k(T(-1), T(0)), std::domain_error); BOOST_CHECK_THROW(boost::math::cyl_bessel_k(T(1), T(0)), std::domain_error); + #endif } diff --git a/test/test_bessel_k0_double.cu b/test/test_bessel_k0_double.cu new file mode 100644 index 0000000000..26d0e2bffa --- /dev/null +++ b/test/test_bessel_k0_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::detail::bessel_k0(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::detail::bessel_k0(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_bessel_k0_float.cu b/test/test_bessel_k0_float.cu new file mode 100644 index 0000000000..ffe59c25bd --- /dev/null +++ b/test/test_bessel_k0_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::detail::bessel_k0(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::detail::bessel_k0(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_bessel_k0_nvrtc_double.cpp b/test/test_bessel_k0_nvrtc_double.cpp new file mode 100644 index 0000000000..d412212125 --- /dev/null +++ b/test/test_bessel_k0_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_bessel_k0_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_k0(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_k0_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_bessel_k0_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_k0_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::detail::bessel_k0(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bessel_k0_nvrtc_float.cpp b/test/test_bessel_k0_nvrtc_float.cpp new file mode 100644 index 0000000000..389fce21a4 --- /dev/null +++ b/test/test_bessel_k0_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_bessel_k0_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_k0(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_k0_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_bessel_k0_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_k0_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::detail::bessel_k0(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bessel_k1_double.cu b/test/test_bessel_k1_double.cu new file mode 100644 index 0000000000..ed1b353d93 --- /dev/null +++ b/test/test_bessel_k1_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::detail::bessel_k1(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::detail::bessel_k1(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_bessel_k1_float.cu b/test/test_bessel_k1_float.cu new file mode 100644 index 0000000000..65fd802f22 --- /dev/null +++ b/test/test_bessel_k1_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::detail::bessel_k1(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::detail::bessel_k1(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_bessel_k1_nvrtc_double.cpp b/test/test_bessel_k1_nvrtc_double.cpp new file mode 100644 index 0000000000..1e0f1e7f4b --- /dev/null +++ b/test/test_bessel_k1_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_bessel_k1_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_k1(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_k1_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_bessel_k1_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_k1_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::detail::bessel_k1(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bessel_k1_nvrtc_float.cpp b/test/test_bessel_k1_nvrtc_float.cpp new file mode 100644 index 0000000000..1422a58869 --- /dev/null +++ b/test/test_bessel_k1_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_bessel_k1_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_k1(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_k1_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_bessel_k1_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_k1_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::detail::bessel_k1(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bessel_kn_double.cu b/test/test_bessel_kn_double.cu new file mode 100644 index 0000000000..d15ba73041 --- /dev/null +++ b/test/test_bessel_kn_double.cu @@ -0,0 +1,105 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + boost::math::policies::policy<> pol; + + if (i < numElements) + { + out[i] = boost::math::detail::bessel_kn(2, in[i], pol); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + boost::math::policies::policy<> pol; + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(boost::math::detail::bessel_kn(2, input_vector[i], pol)); + } + + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_bessel_kn_float.cu b/test/test_bessel_kn_float.cu new file mode 100644 index 0000000000..d15ba73041 --- /dev/null +++ b/test/test_bessel_kn_float.cu @@ -0,0 +1,105 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + boost::math::policies::policy<> pol; + + if (i < numElements) + { + out[i] = boost::math::detail::bessel_kn(2, in[i], pol); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + boost::math::policies::policy<> pol; + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(boost::math::detail::bessel_kn(2, input_vector[i], pol)); + } + + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_bessel_kn_nvrtc_double.cpp b/test/test_bessel_kn_nvrtc_double.cpp new file mode 100644 index 0000000000..3b581f77c3 --- /dev/null +++ b/test/test_bessel_kn_nvrtc_double.cpp @@ -0,0 +1,192 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_bessel_kn_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + boost::math::policies::policy<> pol; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_kn(2, in1[i], pol); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_kn_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_bessel_kn_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_kn_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + boost::math::policies::policy<> pol; + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::detail::bessel_kn(2, h_in1[i], pol); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bessel_kn_nvrtc_float.cpp b/test/test_bessel_kn_nvrtc_float.cpp new file mode 100644 index 0000000000..dcc987a70a --- /dev/null +++ b/test/test_bessel_kn_nvrtc_float.cpp @@ -0,0 +1,192 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_bessel_kn_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + boost::math::policies::policy<> pol; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_kn(2, in1[i], pol); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_kn_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_bessel_kn_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_kn_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + boost::math::policies::policy<> pol; + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::detail::bessel_kn(2, h_in1[i], pol); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bessel_y.cpp b/test/test_bessel_y.cpp index 83c24b95f4..8251920c5b 100644 --- a/test/test_bessel_y.cpp +++ b/test/test_bessel_y.cpp @@ -3,7 +3,20 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#else +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false +#include +#endif + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wliteral-range" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Woverflow" +#endif #include "test_bessel_y.hpp" @@ -234,7 +247,11 @@ void expected_results() ".*", // platform largest_type, // test type(s) ".*(Y[nv]|y).*Random.*", // test data group + #ifdef SYCL_LANGUAGE_VERSION + ".*", 2000, 1000); + #else ".*", 1500, 1000); // test function + #endif // // Fallback for sun has to go after the general cases above: // diff --git a/test/test_bessel_y.hpp b/test/test_bessel_y.hpp index 28361a227c..14b0be4564 100644 --- a/test/test_bessel_y.hpp +++ b/test/test_bessel_y.hpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -241,10 +242,12 @@ void test_bessel(T, const char* name) BOOST_CHECK_EQUAL(boost::math::sph_neumann(2, std::numeric_limits::infinity()), T(0)); } + #ifndef BOOST_MATH_NO_EXCEPTIONS BOOST_CHECK_THROW(boost::math::cyl_neumann(T(0), T(-1)), std::domain_error); BOOST_CHECK_THROW(boost::math::cyl_neumann(T(0.2), T(-1)), std::domain_error); BOOST_CHECK_THROW(boost::math::cyl_neumann(T(2), T(0)), std::domain_error); BOOST_CHECK_THROW(boost::math::sph_neumann(2, T(-2)), std::domain_error); + #endif #if LDBL_MAX_EXP > 1024 if (std::numeric_limits::max_exponent > 1024) { diff --git a/test/test_bessel_y0_double.cu b/test/test_bessel_y0_double.cu new file mode 100644 index 0000000000..c8deada7d7 --- /dev/null +++ b/test/test_bessel_y0_double.cu @@ -0,0 +1,106 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + boost::math::policies::policy<> pol; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_y0(in[i], pol); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + + boost::math::policies::policy<> pol; + for(int i = 0; i < numElements; ++i) + { + results.push_back(boost::math::detail::bessel_y0(input_vector[i], pol)); + } + + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_bessel_y0_float.cu b/test/test_bessel_y0_float.cu new file mode 100644 index 0000000000..c8deada7d7 --- /dev/null +++ b/test/test_bessel_y0_float.cu @@ -0,0 +1,106 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + boost::math::policies::policy<> pol; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_y0(in[i], pol); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + + boost::math::policies::policy<> pol; + for(int i = 0; i < numElements; ++i) + { + results.push_back(boost::math::detail::bessel_y0(input_vector[i], pol)); + } + + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_bessel_y0_nvrtc_double.cpp b/test/test_bessel_y0_nvrtc_double.cpp new file mode 100644 index 0000000000..8645a0fdd0 --- /dev/null +++ b/test/test_bessel_y0_nvrtc_double.cpp @@ -0,0 +1,194 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +#include +extern "C" __global__ +void test_bessel_k0_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + boost::math::policies::policy<> pol; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_y0(in1[i], pol); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_k0_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_bessel_k0_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_k0_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + boost::math::policies::policy<> pol; + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::detail::bessel_y0(h_in1[i], pol); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bessel_y0_nvrtc_float.cpp b/test/test_bessel_y0_nvrtc_float.cpp new file mode 100644 index 0000000000..75a065bd6c --- /dev/null +++ b/test/test_bessel_y0_nvrtc_float.cpp @@ -0,0 +1,194 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +#include +extern "C" __global__ +void test_bessel_k0_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + boost::math::policies::policy<> pol; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_y0(in1[i], pol); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_k0_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_bessel_k0_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_k0_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + boost::math::policies::policy<> pol; + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::detail::bessel_y0(h_in1[i], pol); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bessel_y1_double.cu b/test/test_bessel_y1_double.cu new file mode 100644 index 0000000000..a5b3051b40 --- /dev/null +++ b/test/test_bessel_y1_double.cu @@ -0,0 +1,106 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + boost::math::policies::policy<> pol; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_y1(in[i], pol); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + + boost::math::policies::policy<> pol; + for(int i = 0; i < numElements; ++i) + { + results.push_back(boost::math::detail::bessel_y1(input_vector[i], pol)); + } + + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_bessel_y1_float.cu b/test/test_bessel_y1_float.cu new file mode 100644 index 0000000000..532aaf328d --- /dev/null +++ b/test/test_bessel_y1_float.cu @@ -0,0 +1,106 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + boost::math::policies::policy<> pol; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_y1(in[i], pol); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + + boost::math::policies::policy<> pol; + for(int i = 0; i < numElements; ++i) + { + results.push_back(boost::math::detail::bessel_y1(input_vector[i], pol)); + } + + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_bessel_y1_nvrtc_double.cpp b/test/test_bessel_y1_nvrtc_double.cpp new file mode 100644 index 0000000000..383d879eb1 --- /dev/null +++ b/test/test_bessel_y1_nvrtc_double.cpp @@ -0,0 +1,194 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +#include +extern "C" __global__ +void test_bessel_k0_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + boost::math::policies::policy<> pol; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_y1(in1[i], pol); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_k0_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_bessel_k0_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_k0_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + boost::math::policies::policy<> pol; + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::detail::bessel_y1(h_in1[i], pol); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_bessel_y1_nvrtc_float.cpp b/test/test_bessel_y1_nvrtc_float.cpp new file mode 100644 index 0000000000..c2c1355e64 --- /dev/null +++ b/test/test_bessel_y1_nvrtc_float.cpp @@ -0,0 +1,194 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +#include +extern "C" __global__ +void test_bessel_k0_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + boost::math::policies::policy<> pol; + if (i < numElements) + { + out[i] = boost::math::detail::bessel_y1(in1[i], pol); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_k0_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_bessel_k0_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_k0_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + boost::math::policies::policy<> pol; + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::detail::bessel_y1(h_in1[i], pol); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_beta.cpp b/test/test_beta.cpp index b24cb32c07..4e27b71353 100644 --- a/test/test_beta.cpp +++ b/test/test_beta.cpp @@ -5,7 +5,17 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#include "pch_light.hpp" +#ifndef SYCL_LANGUAGE_VERSION +#include +#endif + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wliteral-range" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Woverflow" +#endif #include "test_beta.hpp" diff --git a/test/test_beta.hpp b/test/test_beta.hpp index 63a07d18f5..362bf51bf2 100644 --- a/test/test_beta.hpp +++ b/test/test_beta.hpp @@ -10,13 +10,18 @@ # pragma warning (disable : 4180) // qualifier applied to function type has no meaning; ignored #endif +#ifdef __CUDACC__ +#pragma nv_diag_suppress 221 +#endif + #include #define BOOST_TEST_MAIN #include #include +#include #include #include -#include +#include "../include_private/boost/math/tools/test.hpp" #include #include #include @@ -100,14 +105,17 @@ void test_spots(T) BOOST_CHECK_CLOSE(::boost::math::beta(small, static_cast(4)), 1/small, tolerance); BOOST_CHECK_CLOSE(::boost::math::beta(small, small / 2), boost::math::tgamma(small) * boost::math::tgamma(small / 2) / boost::math::tgamma(small + small / 2), tolerance); BOOST_CHECK_CLOSE(::boost::math::beta(static_cast(4), static_cast(20)), static_cast(0.00002823263692828910220214568040654997176736L), tolerance); - if ((std::numeric_limits::digits < 100) && (std::numeric_limits::digits != 0)) + if (boost::math::tools::digits() < 100) { // Inexact input, so disable for ultra precise long doubles: BOOST_CHECK_CLOSE(::boost::math::beta(static_cast(0.0125L), static_cast(0.000023L)), static_cast(43558.24045647538375006349016083320744662L), tolerance * 2); } + + #ifndef BOOST_MATH_NO_EXCEPTIONS BOOST_CHECK_THROW(boost::math::beta(static_cast(0), static_cast(1)), std::domain_error); BOOST_CHECK_THROW(boost::math::beta(static_cast(-1), static_cast(1)), std::domain_error); BOOST_CHECK_THROW(boost::math::beta(static_cast(1), static_cast(-1)), std::domain_error); BOOST_CHECK_THROW(boost::math::beta(static_cast(1), static_cast(0)), std::domain_error); + #endif } diff --git a/test/test_beta_dist.cpp b/test/test_beta_dist.cpp index 943718a39f..1652309eb7 100644 --- a/test/test_beta_dist.cpp +++ b/test/test_beta_dist.cpp @@ -32,9 +32,14 @@ # pragma warning (disable : 4224) // nonstandard extension used : formal parameter 'arg' was previously defined as a type. #endif +#include + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include // for real_concept using ::boost::math::concepts::real_concept; -#include +#endif + +#include "../include_private/boost/math/tools/test.hpp" #include // for beta_distribution using boost::math::beta_distribution; @@ -634,12 +639,13 @@ BOOST_AUTO_TEST_CASE( test_main ) BOOST_CHECK_CLOSE_FRACTION(mybeta22.find_alpha(mybeta22.beta(), 0.8, cdf(mybeta22, 0.8)), mybeta22.alpha(), tol); BOOST_CHECK_CLOSE_FRACTION(mybeta22.find_beta(mybeta22.alpha(), 0.8, cdf(mybeta22, 0.8)), mybeta22.beta(), tol); - + #ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS beta_distribution rcbeta22(2, 2); // Using RealType real_concept. cout << "numeric_limits::is_specialized " << numeric_limits::is_specialized << endl; cout << "numeric_limits::digits " << numeric_limits::digits << endl; cout << "numeric_limits::digits10 " << numeric_limits::digits10 << endl; cout << "numeric_limits::epsilon " << numeric_limits::epsilon() << endl; + #endif // (Parameter value, arbitrarily zero, only communicates the floating point type). test_spots(0.0F); // Test float. diff --git a/test/test_beta_dist_cdf_double.cu b/test/test_beta_dist_cdf_double.cu new file mode 100644 index 0000000000..9188f4305f --- /dev/null +++ b/test/test_beta_dist_cdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::beta_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::beta_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_beta_dist_cdf_float.cu b/test/test_beta_dist_cdf_float.cu new file mode 100644 index 0000000000..0278f64155 --- /dev/null +++ b/test/test_beta_dist_cdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::beta_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::beta_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_beta_dist_cdf_nvrtc_double.cpp b/test/test_beta_dist_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..4f5913c108 --- /dev/null +++ b/test/test_beta_dist_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_beta_dist_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::beta_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_dist_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_beta_dist_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_dist_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::beta_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_beta_dist_cdf_nvrtc_float.cpp b/test/test_beta_dist_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..f5b031c5a9 --- /dev/null +++ b/test/test_beta_dist_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_beta_dist_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::beta_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_dist_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_beta_dist_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_dist_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::beta_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_beta_dist_pdf_double.cu b/test/test_beta_dist_pdf_double.cu new file mode 100644 index 0000000000..e86cf94dd8 --- /dev/null +++ b/test/test_beta_dist_pdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::beta_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::beta_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_beta_dist_pdf_float.cu b/test/test_beta_dist_pdf_float.cu new file mode 100644 index 0000000000..97dd606f2f --- /dev/null +++ b/test/test_beta_dist_pdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::beta_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::beta_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_beta_dist_pdf_nvrtc_double.cpp b/test/test_beta_dist_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..c9870e2ce4 --- /dev/null +++ b/test/test_beta_dist_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_beta_dist_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::beta_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_dist_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_beta_dist_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_dist_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::beta_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_beta_dist_pdf_nvrtc_float.cpp b/test/test_beta_dist_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..0b4fd83488 --- /dev/null +++ b/test/test_beta_dist_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_beta_dist_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::beta_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_dist_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_beta_dist_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_dist_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::beta_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_beta_dist_quan_double.cu b/test/test_beta_dist_quan_double.cu new file mode 100644 index 0000000000..a6b842e8ef --- /dev/null +++ b/test/test_beta_dist_quan_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::beta_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::beta_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_beta_dist_quan_float.cu b/test/test_beta_dist_quan_float.cu new file mode 100644 index 0000000000..48a860f4c2 --- /dev/null +++ b/test/test_beta_dist_quan_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::beta_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::beta_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_beta_dist_quan_nvrtc_double.cpp b/test/test_beta_dist_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..9726bf019e --- /dev/null +++ b/test/test_beta_dist_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_beta_dist_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::beta_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_dist_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_beta_dist_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_dist_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::beta_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_beta_dist_quan_nvrtc_float.cpp b/test/test_beta_dist_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..d2476cb2ac --- /dev/null +++ b/test/test_beta_dist_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_beta_dist_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::beta_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_dist_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_beta_dist_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_dist_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::beta_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_beta_double.cu b/test/test_beta_double.cu new file mode 100644 index 0000000000..cd58601584 --- /dev/null +++ b/test/test_beta_double.cu @@ -0,0 +1,132 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type * in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::beta(in1[i], in2[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "beta_med_data.ipp" +#include "beta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2; + + for(unsigned i = 0; i < beta_med_data.size(); ++i) + { + v1.push_back(beta_med_data[i][0]); + v2.push_back(beta_med_data[i][1]); + } + for(unsigned i = 0; i < beta_small_data.size(); ++i) + { + v1.push_back(beta_small_data[i][0]); + v2.push_back(beta_small_data[i][1]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::beta(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + + diff --git a/test/test_beta_float.cu b/test/test_beta_float.cu new file mode 100644 index 0000000000..c4c078f373 --- /dev/null +++ b/test/test_beta_float.cu @@ -0,0 +1,130 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type * in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::beta(in1[i], in2[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "beta_med_data.ipp" +#include "beta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2; + + for(unsigned i = 0; i < beta_med_data.size(); ++i) + { + v1.push_back(beta_med_data[i][0]); + v2.push_back(beta_med_data[i][1]); + } + for(unsigned i = 0; i < beta_small_data.size(); ++i) + { + v1.push_back(beta_small_data[i][0]); + v2.push_back(beta_small_data[i][1]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::beta(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/test_beta_nvrtc_double.cpp b/test/test_beta_nvrtc_double.cpp new file mode 100644 index 0000000000..fdc502a195 --- /dev/null +++ b/test/test_beta_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_beta_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::beta(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_beta_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::beta(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_beta_nvrtc_float.cpp b/test/test_beta_nvrtc_float.cpp new file mode 100644 index 0000000000..d403d33155 --- /dev/null +++ b/test/test_beta_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_beta_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::beta(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_beta_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::beta(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_beta_simple.cpp b/test/test_beta_simple.cpp new file mode 100644 index 0000000000..436b14ab46 --- /dev/null +++ b/test/test_beta_simple.cpp @@ -0,0 +1,38 @@ +// Copyright John Maddock 2006. +// Copyright Paul A. Bristow 2007, 2009 +// Copyright Matt Borland 2024 +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include "math_unit_test.hpp" + +template +void test_spots(T) +{ + // + // Basic sanity checks, tolerance is 20 epsilon expressed as a percentage: + // + T tolerance = 20; + T small = boost::math::tools::epsilon() / 1024; + CHECK_ULP_CLOSE(::boost::math::beta(static_cast(1), static_cast(1)), static_cast(1), tolerance); + CHECK_ULP_CLOSE(::boost::math::beta(static_cast(1), static_cast(4)), static_cast(0.25), tolerance); + CHECK_ULP_CLOSE(::boost::math::beta(static_cast(4), static_cast(1)), static_cast(0.25), tolerance); + CHECK_ULP_CLOSE(::boost::math::beta(small, static_cast(4)), 1/small, tolerance); + CHECK_ULP_CLOSE(::boost::math::beta(static_cast(4), small), 1/small, tolerance); + CHECK_ULP_CLOSE(::boost::math::beta(small, static_cast(4)), 1/small, tolerance); + CHECK_ULP_CLOSE(::boost::math::beta(static_cast(4), static_cast(20)), static_cast(0.00002823263692828910220214568040654997176736L), tolerance); +} + +int main() +{ + test_spots(0.0F); + test_spots(0.0); + + return boost::math::test::report_errors(); +} diff --git a/test/test_betac_double.cu b/test/test_betac_double.cu new file mode 100644 index 0000000000..8bb31d3219 --- /dev/null +++ b/test/test_betac_double.cu @@ -0,0 +1,146 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::betac(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "beta_med_data.ipp" +#include "beta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2, v3; + + for(unsigned i = 0; i < beta_med_data.size(); ++i) + { + v1.push_back(beta_med_data[i][0]); + v2.push_back(beta_med_data[i][1]); + v3.push_back(beta_med_data[i][2]); + } + for(unsigned i = 0; i < beta_small_data.size(); ++i) + { + v1.push_back(beta_small_data[i][0]); + v2.push_back(beta_small_data[i][1]); + v3.push_back(beta_small_data[i][2]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::betac(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + bool failed = false; + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::isfinite(output_vector[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + + diff --git a/test/test_betac_float.cu b/test/test_betac_float.cu new file mode 100644 index 0000000000..7070c567cc --- /dev/null +++ b/test/test_betac_float.cu @@ -0,0 +1,146 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::betac(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "beta_med_data.ipp" +#include "beta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2, v3; + + for(unsigned i = 0; i < beta_med_data.size(); ++i) + { + v1.push_back(beta_med_data[i][0]); + v2.push_back(beta_med_data[i][1]); + v3.push_back(beta_med_data[i][2]); + } + for(unsigned i = 0; i < beta_small_data.size(); ++i) + { + v1.push_back(beta_small_data[i][0]); + v2.push_back(beta_small_data[i][1]); + v3.push_back(beta_small_data[i][2]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::betac(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + bool failed = false; + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::isfinite(output_vector[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + + diff --git a/test/test_betac_nvrtc_double.cpp b/test/test_betac_nvrtc_double.cpp new file mode 100644 index 0000000000..0667cfe0d4 --- /dev/null +++ b/test/test_betac_nvrtc_double.cpp @@ -0,0 +1,196 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_beta_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::betac(in1[i], in2[i], in3[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_beta_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_in3, *h_out; + float_type *d_in1, *d_in2, *d_in3, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_in3 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + h_in3[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::betac(h_in1[i], h_in2[i], h_in3[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_in3; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_betac_nvrtc_float.cpp b/test/test_betac_nvrtc_float.cpp new file mode 100644 index 0000000000..0667cfe0d4 --- /dev/null +++ b/test/test_betac_nvrtc_float.cpp @@ -0,0 +1,196 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_beta_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::betac(in1[i], in2[i], in3[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_beta_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_in3, *h_out; + float_type *d_in1, *d_in2, *d_in3, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_in3 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + h_in3[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::betac(h_in1[i], h_in2[i], h_in3[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_in3; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_binomial.cpp b/test/test_binomial.cpp index 8aec49e4f8..ef7f171723 100644 --- a/test/test_binomial.cpp +++ b/test/test_binomial.cpp @@ -27,7 +27,6 @@ // Enable C++ Exceptions Yes With SEH Exceptions (/EHa) prevents warning 4535. #endif -#include #include // for real_concept using ::boost::math::concepts::real_concept; @@ -522,57 +521,57 @@ void test_spots(RealType T) binomial_distribution(static_cast(0), static_cast(0.25)), static_cast(0)), static_cast(1) ); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( pdf( binomial_distribution(static_cast(-1), static_cast(0.25)), static_cast(0)), std::domain_error ); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( pdf( binomial_distribution(static_cast(8), static_cast(-0.25)), static_cast(0)), std::domain_error ); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( pdf( binomial_distribution(static_cast(8), static_cast(1.25)), static_cast(0)), std::domain_error ); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( pdf( binomial_distribution(static_cast(8), static_cast(0.25)), static_cast(-1)), std::domain_error ); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( pdf( binomial_distribution(static_cast(8), static_cast(0.25)), static_cast(9)), std::domain_error ); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( cdf( binomial_distribution(static_cast(8), static_cast(0.25)), static_cast(-1)), std::domain_error ); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( cdf( binomial_distribution(static_cast(8), static_cast(0.25)), static_cast(9)), std::domain_error ); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( cdf( binomial_distribution(static_cast(8), static_cast(-0.25)), static_cast(0)), std::domain_error ); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( cdf( binomial_distribution(static_cast(8), static_cast(1.25)), static_cast(0)), std::domain_error ); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( quantile( binomial_distribution(static_cast(8), static_cast(-0.25)), static_cast(0)), std::domain_error ); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( quantile( binomial_distribution(static_cast(8), static_cast(1.25)), static_cast(0)), std::domain_error diff --git a/test/test_cauchy.cpp b/test/test_cauchy.cpp index 002690e5f8..f5893264a6 100644 --- a/test/test_cauchy.cpp +++ b/test/test_cauchy.cpp @@ -18,17 +18,16 @@ // #define BOOST_MATH_ASSERT_UNDEFINED_POLICY false // To compile even if Cauchy mean is used. -#include #include // for real_concept #include using boost::math::cauchy_distribution; -#include "test_out_of_range.hpp" - #define BOOST_TEST_MAIN #include // Boost.Test #include +#include "test_out_of_range.hpp" + #include using std::cout; using std::endl; @@ -38,11 +37,11 @@ void test_spots(RealType T) { // Check some bad parameters to construct the distribution, #ifndef BOOST_NO_EXCEPTIONS - BOOST_MATH_CHECK_THROW(boost::math::cauchy_distribution nbad1(0, 0), std::domain_error); // zero scale. - BOOST_MATH_CHECK_THROW(boost::math::cauchy_distribution nbad1(0, -1), std::domain_error); // negative scale (shape). + BOOST_CHECK_THROW(boost::math::cauchy_distribution nbad1(0, 0), std::domain_error); // zero scale. + BOOST_CHECK_THROW(boost::math::cauchy_distribution nbad1(0, -1), std::domain_error); // negative scale (shape). #else - BOOST_MATH_CHECK_THROW(boost::math::cauchy_distribution(0, 0), std::domain_error); // zero scale. - BOOST_MATH_CHECK_THROW(boost::math::cauchy_distribution(0, -1), std::domain_error); // negative scale (shape). + BOOST_CHECK_THROW(boost::math::cauchy_distribution(0, 0), std::domain_error); // zero scale. + BOOST_CHECK_THROW(boost::math::cauchy_distribution(0, -1), std::domain_error); // negative scale (shape). #endif cauchy_distribution C01; @@ -667,35 +666,35 @@ void test_spots(RealType T) // To compile even if Cauchy mean is used. // See policy reference, mathematically undefined function policies // - //BOOST_MATH_CHECK_THROW( + //BOOST_CHECK_THROW( // mean(dist), // std::domain_error); - //BOOST_MATH_CHECK_THROW( + //BOOST_CHECK_THROW( // variance(dist), // std::domain_error); - //BOOST_MATH_CHECK_THROW( + //BOOST_CHECK_THROW( // standard_deviation(dist), // std::domain_error); - //BOOST_MATH_CHECK_THROW( + //BOOST_CHECK_THROW( // kurtosis(dist), // std::domain_error); - //BOOST_MATH_CHECK_THROW( + //BOOST_CHECK_THROW( // kurtosis_excess(dist), // std::domain_error); - //BOOST_MATH_CHECK_THROW( + //BOOST_CHECK_THROW( // skewness(dist), // std::domain_error); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( quantile(dist, RealType(0.0)), std::overflow_error); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( quantile(dist, RealType(1.0)), std::overflow_error); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( quantile(complement(dist, RealType(0.0))), std::overflow_error); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( quantile(complement(dist, RealType(1.0))), std::overflow_error); @@ -705,7 +704,7 @@ void test_spots(RealType T) } // template void test_spots(RealType) -BOOST_AUTO_TEST_CASE( test_main ) +BOOST_AUTO_TEST_CASE(test_main) { BOOST_MATH_CONTROL_FP; // Check that can generate cauchy distribution using the two convenience methods: diff --git a/test/test_cauchy_cdf_double.cu b/test/test_cauchy_cdf_double.cu new file mode 100644 index 0000000000..526744ba1f --- /dev/null +++ b/test/test_cauchy_cdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::cauchy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::cauchy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_cauchy_cdf_float.cu b/test/test_cauchy_cdf_float.cu new file mode 100644 index 0000000000..526744ba1f --- /dev/null +++ b/test/test_cauchy_cdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::cauchy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::cauchy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_cauchy_cdf_nvrtc_double.cpp b/test/test_cauchy_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..f9b16b6e0e --- /dev/null +++ b/test/test_cauchy_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::cauchy_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cauchy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::cauchy_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_cauchy_cdf_nvrtc_float.cpp b/test/test_cauchy_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..0870900ae4 --- /dev/null +++ b/test/test_cauchy_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::cauchy_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cauchy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::cauchy_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_cauchy_pdf_double.cu b/test/test_cauchy_pdf_double.cu new file mode 100644 index 0000000000..62398c31ed --- /dev/null +++ b/test/test_cauchy_pdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::cauchy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::cauchy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_cauchy_pdf_float.cu b/test/test_cauchy_pdf_float.cu new file mode 100644 index 0000000000..aff3369b83 --- /dev/null +++ b/test/test_cauchy_pdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::cauchy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::cauchy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_cauchy_pdf_nvrtc_double.cpp b/test/test_cauchy_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..24e2808c93 --- /dev/null +++ b/test/test_cauchy_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::cauchy_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cauchy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::cauchy_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_cauchy_pdf_nvrtc_float.cpp b/test/test_cauchy_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..605d257831 --- /dev/null +++ b/test/test_cauchy_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::cauchy_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cauchy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::cauchy_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_cauchy_quan_double.cu b/test/test_cauchy_quan_double.cu new file mode 100644 index 0000000000..0fcaaafe7c --- /dev/null +++ b/test/test_cauchy_quan_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::cauchy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::cauchy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_cauchy_quan_float.cu b/test/test_cauchy_quan_float.cu new file mode 100644 index 0000000000..9c04c5b12a --- /dev/null +++ b/test/test_cauchy_quan_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::cauchy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::cauchy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_cauchy_quan_nvrtc_double.cpp b/test/test_cauchy_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..08fa01c4f9 --- /dev/null +++ b/test/test_cauchy_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::cauchy_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cauchy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::cauchy_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_cauchy_quan_nvrtc_float.cpp b/test/test_cauchy_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..6d63d1c2d6 --- /dev/null +++ b/test/test_cauchy_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::cauchy_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cauchy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::cauchy_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_cauchy_range_support_double.cu b/test/test_cauchy_range_support_double.cu new file mode 100644 index 0000000000..3a42c1bd30 --- /dev/null +++ b/test/test_cauchy_range_support_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type* in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = support(boost::math::cauchy_distribution(in1[i])).second; + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(support(boost::math::cauchy_distribution(input_vector1[i])).second); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_cauchy_range_support_float.cu b/test/test_cauchy_range_support_float.cu new file mode 100644 index 0000000000..e713736e60 --- /dev/null +++ b/test/test_cauchy_range_support_float.cu @@ -0,0 +1,111 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type* in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = range(boost::math::cauchy_distribution(in1[i])).first; + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(range(boost::math::cauchy_distribution(input_vector1[i])).first); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + std::cerr << "Device got: " << output_vector[i] << ", and serial got: " << results[i] << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_cbrt.cpp b/test/test_cbrt.cpp index 8b36a765d0..6abb9bd885 100644 --- a/test/test_cbrt.cpp +++ b/test/test_cbrt.cpp @@ -9,7 +9,10 @@ # pragma warning (disable : 4224) #endif +#ifndef SYCL_LANGUAGE_VERSION #include // include /libs/math/src/ +#endif + #include "test_cbrt.hpp" #include // Added to avoid link failure missing cbrt variants. diff --git a/test/test_cbrt.hpp b/test/test_cbrt.hpp index f606a58407..77e4aed516 100644 --- a/test/test_cbrt.hpp +++ b/test/test_cbrt.hpp @@ -1,5 +1,6 @@ // Copyright John Maddock 2006. // Copyright Paul A. Bristow 2007, 2009 +// Copyright Matt Borland 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -9,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -93,7 +93,9 @@ void test_cbrt(T, const char* name) } BOOST_IF_CONSTEXPR(std::numeric_limits::has_quiet_NaN) { + #ifndef BOOST_MATH_NO_EXCEPTIONS BOOST_CHECK_THROW(boost::math::cbrt(std::numeric_limits::quiet_NaN()), std::domain_error); + #endif } } diff --git a/test/test_cbrt_double.cu b/test/test_cbrt_double.cu new file mode 100644 index 0000000000..cc2c326850 --- /dev/null +++ b/test/test_cbrt_double.cu @@ -0,0 +1,99 @@ + +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cbrt(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::cbrt(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cbrt_float.cu b/test/test_cbrt_float.cu new file mode 100644 index 0000000000..a4e98cce8d --- /dev/null +++ b/test/test_cbrt_float.cu @@ -0,0 +1,99 @@ + +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cbrt(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::cbrt(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cbrt_nvrtc_double.cpp b/test/test_cbrt_nvrtc_double.cpp new file mode 100644 index 0000000000..717d116d68 --- /dev/null +++ b/test/test_cbrt_nvrtc_double.cpp @@ -0,0 +1,186 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +extern "C" __global__ +void test_cbrt_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::cbrt(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cbrt_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cbrt_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cbrt_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::cbrt(h_in1[i]); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_cbrt_nvrtc_float.cpp b/test/test_cbrt_nvrtc_float.cpp new file mode 100644 index 0000000000..a595cb8705 --- /dev/null +++ b/test/test_cbrt_nvrtc_float.cpp @@ -0,0 +1,186 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +extern "C" __global__ +void test_cbrt_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::cbrt(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cbrt_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cbrt_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cbrt_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::cbrt(h_in1[i]); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_changesign_double.cu b/test/test_changesign_double.cu new file mode 100644 index 0000000000..bfb2ade1e2 --- /dev/null +++ b/test/test_changesign_double.cu @@ -0,0 +1,111 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::changesign(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + } + if(i % 1) + h_A[i] = -h_A[i]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::changesign(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_changesign_float.cu b/test/test_changesign_float.cu new file mode 100644 index 0000000000..d7e1764bdf --- /dev/null +++ b/test/test_changesign_float.cu @@ -0,0 +1,111 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::changesign(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + } + if(i % 1) + h_A[i] = -h_A[i]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::changesign(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_chi_squared.cpp b/test/test_chi_squared.cpp index cc7747a6c0..bfd4b5f3a2 100644 --- a/test/test_chi_squared.cpp +++ b/test/test_chi_squared.cpp @@ -16,9 +16,13 @@ # pragma warning(disable: 4127) // conditional expression is constant #endif -#include // for real_concept +#include +#include "../include_private/boost/math/tools/test.hpp" + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include // for real_concept using ::boost::math::concepts::real_concept; +#endif #include // for chi_squared_distribution #include // for chi_squared_distribution diff --git a/test/test_chi_squared_cdf_double.cu b/test/test_chi_squared_cdf_double.cu new file mode 100644 index 0000000000..c2475883b9 --- /dev/null +++ b/test/test_chi_squared_cdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::chi_squared_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::chi_squared_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_chi_squared_cdf_float.cu b/test/test_chi_squared_cdf_float.cu new file mode 100644 index 0000000000..07dce0d067 --- /dev/null +++ b/test/test_chi_squared_cdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::chi_squared_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::chi_squared_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_chi_squared_cdf_nvrtc_double.cpp b/test/test_chi_squared_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..0ad459fa67 --- /dev/null +++ b/test/test_chi_squared_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::chi_squared_distribution(1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_chi_squared_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_chi_squared_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_chi_squared_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::chi_squared_distribution(1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_chi_squared_cdf_nvrtc_float.cpp b/test/test_chi_squared_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..1b26c5d6f2 --- /dev/null +++ b/test/test_chi_squared_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::chi_squared_distribution(1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_chi_squared_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_chi_squared_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_chi_squared_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::chi_squared_distribution(1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_chi_squared_pdf_double.cu b/test/test_chi_squared_pdf_double.cu new file mode 100644 index 0000000000..30edafd050 --- /dev/null +++ b/test/test_chi_squared_pdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::chi_squared_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::chi_squared_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_chi_squared_pdf_float.cu b/test/test_chi_squared_pdf_float.cu new file mode 100644 index 0000000000..9b205182ba --- /dev/null +++ b/test/test_chi_squared_pdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::chi_squared_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::chi_squared_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_chi_squared_pdf_nvrtc_double.cpp b/test/test_chi_squared_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..18d14a4b0e --- /dev/null +++ b/test/test_chi_squared_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::chi_squared_distribution(1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_chi_squared_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_chi_squared_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_chi_squared_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::chi_squared_distribution(1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_chi_squared_pdf_nvrtc_float.cpp b/test/test_chi_squared_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..754cbf7fba --- /dev/null +++ b/test/test_chi_squared_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::chi_squared_distribution(1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_chi_squared_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_chi_squared_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_chi_squared_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::chi_squared_distribution(1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_chi_squared_quan_double.cu b/test/test_chi_squared_quan_double.cu new file mode 100644 index 0000000000..3fae7d966f --- /dev/null +++ b/test/test_chi_squared_quan_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::chi_squared_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::chi_squared_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_chi_squared_quan_float.cu b/test/test_chi_squared_quan_float.cu new file mode 100644 index 0000000000..7a717530e1 --- /dev/null +++ b/test/test_chi_squared_quan_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::chi_squared_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::chi_squared_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_chi_squared_quan_nvrtc_double.cpp b/test/test_chi_squared_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..69b15b6cfd --- /dev/null +++ b/test/test_chi_squared_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::chi_squared_distribution(1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_chi_squared_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_chi_squared_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_chi_squared_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::chi_squared_distribution(1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_chi_squared_quan_nvrtc_float.cpp b/test/test_chi_squared_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..d6e1b2a9b5 --- /dev/null +++ b/test/test_chi_squared_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::chi_squared_distribution(1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_chi_squared_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_chi_squared_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_chi_squared_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::chi_squared_distribution(1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_cos_pi_double.cu b/test/test_cos_pi_double.cu new file mode 100644 index 0000000000..5a66b25ce2 --- /dev/null +++ b/test/test_cos_pi_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cos_pi(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::cos_pi(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cos_pi_float.cu b/test/test_cos_pi_float.cu new file mode 100644 index 0000000000..6a04d8e046 --- /dev/null +++ b/test/test_cos_pi_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cos_pi(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::cos_pi(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cos_pi_nvrtc_double.cpp b/test/test_cos_pi_nvrtc_double.cpp new file mode 100644 index 0000000000..459524bbed --- /dev/null +++ b/test/test_cos_pi_nvrtc_double.cpp @@ -0,0 +1,186 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +extern "C" __global__ +void test_cos_pi_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::cos_pi(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cos_pi_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cos_pi_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cos_pi_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::cos_pi(h_in1[i]); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_cos_pi_nvrtc_float.cpp b/test/test_cos_pi_nvrtc_float.cpp new file mode 100644 index 0000000000..2f541e217d --- /dev/null +++ b/test/test_cos_pi_nvrtc_float.cpp @@ -0,0 +1,186 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +extern "C" __global__ +void test_cos_pi_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::cos_pi(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cos_pi_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cos_pi_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cos_pi_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::cos_pi(h_in1[i]); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_cyl_bessel_i_double.cu b/test/test_cyl_bessel_i_double.cu new file mode 100644 index 0000000000..91a3ed8ebf --- /dev/null +++ b/test/test_cyl_bessel_i_double.cu @@ -0,0 +1,104 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cyl_bessel_i(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::cyl_bessel_i(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cyl_bessel_i_float.cu b/test/test_cyl_bessel_i_float.cu new file mode 100644 index 0000000000..5aad1be88b --- /dev/null +++ b/test/test_cyl_bessel_i_float.cu @@ -0,0 +1,104 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cyl_bessel_i(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::cyl_bessel_i(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cyl_bessel_i_nvrtc_double.cpp b/test/test_cyl_bessel_i_nvrtc_double.cpp new file mode 100644 index 0000000000..50bfc0c790 --- /dev/null +++ b/test/test_cyl_bessel_i_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_cyl_bessel_i_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::cyl_bessel_i(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_bessel_i_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cyl_bessel_i_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_bessel_i_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::cyl_bessel_i(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_cyl_bessel_i_nvrtc_float.cpp b/test/test_cyl_bessel_i_nvrtc_float.cpp new file mode 100644 index 0000000000..c73992a27a --- /dev/null +++ b/test/test_cyl_bessel_i_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_cyl_bessel_i_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::cyl_bessel_i(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_bessel_i_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cyl_bessel_i_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_bessel_i_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::cyl_bessel_i(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_cyl_bessel_j_double.cu b/test/test_cyl_bessel_j_double.cu new file mode 100644 index 0000000000..b5d93f1ddb --- /dev/null +++ b/test/test_cyl_bessel_j_double.cu @@ -0,0 +1,104 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cyl_bessel_j(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::cyl_bessel_j(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cyl_bessel_j_float.cu b/test/test_cyl_bessel_j_float.cu new file mode 100644 index 0000000000..3edc2a7c9c --- /dev/null +++ b/test/test_cyl_bessel_j_float.cu @@ -0,0 +1,104 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cyl_bessel_j(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::cyl_bessel_j(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cyl_bessel_j_nvrtc_double.cpp b/test/test_cyl_bessel_j_nvrtc_double.cpp new file mode 100644 index 0000000000..f74e112edd --- /dev/null +++ b/test/test_cyl_bessel_j_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_cyl_bessel_j_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::cyl_bessel_j(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_bessel_j_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cyl_bessel_j_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_bessel_j_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::cyl_bessel_j(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_cyl_bessel_j_nvrtc_float.cpp b/test/test_cyl_bessel_j_nvrtc_float.cpp new file mode 100644 index 0000000000..e3d7928438 --- /dev/null +++ b/test/test_cyl_bessel_j_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_cyl_bessel_j_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::cyl_bessel_j(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_bessel_j_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cyl_bessel_j_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_bessel_j_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::cyl_bessel_j(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_cyl_bessel_k_double.cu b/test/test_cyl_bessel_k_double.cu new file mode 100644 index 0000000000..3dfd2bf388 --- /dev/null +++ b/test/test_cyl_bessel_k_double.cu @@ -0,0 +1,104 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cyl_bessel_k(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::cyl_bessel_k(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cyl_bessel_k_float.cu b/test/test_cyl_bessel_k_float.cu new file mode 100644 index 0000000000..b874857a05 --- /dev/null +++ b/test/test_cyl_bessel_k_float.cu @@ -0,0 +1,104 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cyl_bessel_k(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::cyl_bessel_k(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cyl_bessel_k_nvrtc_double.cpp b/test/test_cyl_bessel_k_nvrtc_double.cpp new file mode 100644 index 0000000000..66a8b14900 --- /dev/null +++ b/test/test_cyl_bessel_k_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_cyl_bessel_k_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::cyl_bessel_k(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_bessel_k_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cyl_bessel_k_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_bessel_k_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::cyl_bessel_k(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_cyl_bessel_k_nvrtc_float.cpp b/test/test_cyl_bessel_k_nvrtc_float.cpp new file mode 100644 index 0000000000..e23ff82c0d --- /dev/null +++ b/test/test_cyl_bessel_k_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_cyl_bessel_k_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::cyl_bessel_k(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_bessel_k_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cyl_bessel_k_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_bessel_k_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::cyl_bessel_k(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_cyl_hankel_1_double.cu b/test/test_cyl_hankel_1_double.cu new file mode 100644 index 0000000000..1349469341 --- /dev/null +++ b/test/test_cyl_hankel_1_double.cu @@ -0,0 +1,119 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, boost::math::complex *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cyl_hankel_1(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr> output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector> results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results[i] = boost::math::cyl_hankel_1(input_vector1[i], input_vector2[i]); + double t = w.elapsed(); + // check the results + int failure_counter = 0; + for(int i = 0; i < numElements; ++i) + { + const auto eps = boost::math::epsilon_difference(output_vector[i].real(), results[i].real()); + if (eps > 10) + { + std::cerr << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i].real() << ", " << output_vector[i].imag() + << "\n Host: " << results[i].real() << ", " << results[i].imag() + << "\n Eps: " << eps << std::endl; + ++failure_counter; + if (failure_counter > 100) + { + break; + } + } + } + + if (failure_counter > 0) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cyl_hankel_1_float.cu b/test/test_cyl_hankel_1_float.cu new file mode 100644 index 0000000000..da78c375c6 --- /dev/null +++ b/test/test_cyl_hankel_1_float.cu @@ -0,0 +1,119 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, boost::math::complex *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cyl_hankel_1(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr> output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector> results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results[i] = boost::math::cyl_hankel_1(input_vector1[i], input_vector2[i]); + double t = w.elapsed(); + // check the results + int failure_counter = 0; + for(int i = 0; i < numElements; ++i) + { + const auto eps = boost::math::epsilon_difference(output_vector[i].real(), results[i].real()); + if (eps > 10) + { + std::cerr << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i].real() << ", " << output_vector[i].imag() + << "\n Host: " << results[i].real() << ", " << results[i].imag() + << "\n Eps: " << eps << std::endl; + ++failure_counter; + if (failure_counter > 100) + { + break; + } + } + } + + if (failure_counter > 0) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cyl_hankel_1_nvrtc_double.cpp b/test/test_cyl_hankel_1_nvrtc_double.cpp new file mode 100644 index 0000000000..298436d063 --- /dev/null +++ b/test/test_cyl_hankel_1_nvrtc_double.cpp @@ -0,0 +1,199 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_cyl_hankel_1_kernel(const float_type *in1, const float_type* in2, boost::math::complex *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::cyl_hankel_1(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_hankel_1_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cyl_hankel_1_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_hankel_1_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2; + float_type *d_in1, *d_in2; + boost::math::complex *h_out, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new boost::math::complex[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(boost::math::complex)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(boost::math::complex), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + int fail_counter = 0; + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::cyl_hankel_1(h_in1[i], h_in2[i]); + if (boost::math::epsilon_difference(res.real(), h_out[i].real()) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i].real() << ", " << h_out[i].imag() + << "\n Serial: " << res.real() << ", " << res.imag() + << "\n Dist: " << boost::math::epsilon_difference(res.real(), h_out[i].real()) << std::endl; + ++fail_counter; + if (fail_counter > 100) + { + break; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + if (fail_counter > 0) + { + return 1; + } + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_cyl_hankel_1_nvrtc_float.cpp b/test/test_cyl_hankel_1_nvrtc_float.cpp new file mode 100644 index 0000000000..d505c7bc4c --- /dev/null +++ b/test/test_cyl_hankel_1_nvrtc_float.cpp @@ -0,0 +1,199 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_cyl_hankel_1_kernel(const float_type *in1, const float_type* in2, boost::math::complex *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::cyl_hankel_1(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_hankel_1_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cyl_hankel_1_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_hankel_1_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2; + float_type *d_in1, *d_in2; + boost::math::complex *h_out, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new boost::math::complex[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(boost::math::complex)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(boost::math::complex), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + int fail_counter = 0; + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::cyl_hankel_1(h_in1[i], h_in2[i]); + if (boost::math::epsilon_difference(res.real(), h_out[i].real()) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i].real() << ", " << h_out[i].imag() + << "\n Serial: " << res.real() << ", " << res.imag() + << "\n Dist: " << boost::math::epsilon_difference(res.real(), h_out[i].real()) << std::endl; + ++fail_counter; + if (fail_counter > 100) + { + break; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + if (fail_counter > 0) + { + return 1; + } + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_cyl_hankel_2_double.cu b/test/test_cyl_hankel_2_double.cu new file mode 100644 index 0000000000..55b643173a --- /dev/null +++ b/test/test_cyl_hankel_2_double.cu @@ -0,0 +1,119 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, boost::math::complex *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cyl_hankel_2(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr> output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector> results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results[i] = boost::math::cyl_hankel_2(input_vector1[i], input_vector2[i]); + double t = w.elapsed(); + // check the results + int failure_counter = 0; + for(int i = 0; i < numElements; ++i) + { + const auto eps = boost::math::epsilon_difference(output_vector[i].real(), results[i].real()); + if (eps > 10) + { + std::cerr << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i].real() << ", " << output_vector[i].imag() + << "\n Host: " << results[i].real() << ", " << results[i].imag() + << "\n Eps: " << eps << std::endl; + ++failure_counter; + if (failure_counter > 100) + { + break; + } + } + } + + if (failure_counter > 0) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cyl_hankel_2_float.cu b/test/test_cyl_hankel_2_float.cu new file mode 100644 index 0000000000..5766ebeb48 --- /dev/null +++ b/test/test_cyl_hankel_2_float.cu @@ -0,0 +1,119 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, boost::math::complex *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cyl_hankel_2(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr> output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector> results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results[i] = boost::math::cyl_hankel_2(input_vector1[i], input_vector2[i]); + double t = w.elapsed(); + // check the results + int failure_counter = 0; + for(int i = 0; i < numElements; ++i) + { + const auto eps = boost::math::epsilon_difference(output_vector[i].real(), results[i].real()); + if (eps > 10) + { + std::cerr << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i].real() << ", " << output_vector[i].imag() + << "\n Host: " << results[i].real() << ", " << results[i].imag() + << "\n Eps: " << eps << std::endl; + ++failure_counter; + if (failure_counter > 100) + { + break; + } + } + } + + if (failure_counter > 0) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cyl_hankel_2_nvrtc_double.cpp b/test/test_cyl_hankel_2_nvrtc_double.cpp new file mode 100644 index 0000000000..f7589d2016 --- /dev/null +++ b/test/test_cyl_hankel_2_nvrtc_double.cpp @@ -0,0 +1,199 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_cyl_hankel_2_kernel(const float_type *in1, const float_type* in2, boost::math::complex *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::cyl_hankel_2(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_hankel_2_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cyl_hankel_2_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_hankel_2_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2; + float_type *d_in1, *d_in2; + boost::math::complex *h_out, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new boost::math::complex[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(boost::math::complex)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(boost::math::complex), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + int fail_counter = 0; + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::cyl_hankel_2(h_in1[i], h_in2[i]); + if (boost::math::epsilon_difference(res.real(), h_out[i].real()) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i].real() << ", " << h_out[i].imag() + << "\n Serial: " << res.real() << ", " << res.imag() + << "\n Dist: " << boost::math::epsilon_difference(res.real(), h_out[i].real()) << std::endl; + ++fail_counter; + if (fail_counter > 100) + { + break; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + if (fail_counter > 0) + { + return 1; + } + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_cyl_hankel_2_nvrtc_float.cpp b/test/test_cyl_hankel_2_nvrtc_float.cpp new file mode 100644 index 0000000000..54216d39c9 --- /dev/null +++ b/test/test_cyl_hankel_2_nvrtc_float.cpp @@ -0,0 +1,199 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_cyl_hankel_2_kernel(const float_type *in1, const float_type* in2, boost::math::complex *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::cyl_hankel_2(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_hankel_2_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cyl_hankel_2_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_hankel_2_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2; + float_type *d_in1, *d_in2; + boost::math::complex *h_out, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new boost::math::complex[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(boost::math::complex)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(boost::math::complex), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + int fail_counter = 0; + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::cyl_hankel_2(h_in1[i], h_in2[i]); + if (boost::math::epsilon_difference(res.real(), h_out[i].real()) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i].real() << ", " << h_out[i].imag() + << "\n Serial: " << res.real() << ", " << res.imag() + << "\n Dist: " << boost::math::epsilon_difference(res.real(), h_out[i].real()) << std::endl; + ++fail_counter; + if (fail_counter > 100) + { + break; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + if (fail_counter > 0) + { + return 1; + } + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_cyl_neumann_double.cu b/test/test_cyl_neumann_double.cu new file mode 100644 index 0000000000..0e7a72ff98 --- /dev/null +++ b/test/test_cyl_neumann_double.cu @@ -0,0 +1,116 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cyl_neumann(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::cyl_neumann(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + bool failed = false; + // check the results + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(output_vector[i]) && std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 5000) + { + std::cout << "error at line: " << i + << "\nParallel: " << results[i] + << "\n Serial: " << output_vector[i] + << "\n Dist: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cyl_neumann_float.cu b/test/test_cyl_neumann_float.cu new file mode 100644 index 0000000000..f621d2fc65 --- /dev/null +++ b/test/test_cyl_neumann_float.cu @@ -0,0 +1,104 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cyl_neumann(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::cyl_neumann(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cyl_neumann_nvrtc_double.cpp b/test/test_cyl_neumann_nvrtc_double.cpp new file mode 100644 index 0000000000..78bbd3b5ca --- /dev/null +++ b/test/test_cyl_neumann_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_cyl_neumann_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::cyl_neumann(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_neumann_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cyl_neumann_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_neumann_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::cyl_neumann(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_cyl_neumann_nvrtc_float.cpp b/test/test_cyl_neumann_nvrtc_float.cpp new file mode 100644 index 0000000000..78bbd3b5ca --- /dev/null +++ b/test/test_cyl_neumann_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_cyl_neumann_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::cyl_neumann(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_neumann_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cyl_neumann_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_neumann_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::cyl_neumann(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_digamma_double.cu b/test/test_digamma_double.cu new file mode 100644 index 0000000000..c88fe153c5 --- /dev/null +++ b/test/test_digamma_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::digamma(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::digamma(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_digamma_float.cu b/test/test_digamma_float.cu new file mode 100644 index 0000000000..ea1b1c68e9 --- /dev/null +++ b/test/test_digamma_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::digamma(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::digamma(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_digamma_nvrtc_double.cpp b/test/test_digamma_nvrtc_double.cpp new file mode 100644 index 0000000000..d3da101881 --- /dev/null +++ b/test/test_digamma_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_digamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::digamma(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_digamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_digamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_digamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::digamma(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_digamma_nvrtc_float.cpp b/test/test_digamma_nvrtc_float.cpp new file mode 100644 index 0000000000..a698cbd56d --- /dev/null +++ b/test/test_digamma_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_digamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::digamma(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_digamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_digamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_digamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::digamma(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_digamma_simple.cpp b/test/test_digamma_simple.cpp new file mode 100644 index 0000000000..bbe003a015 --- /dev/null +++ b/test/test_digamma_simple.cpp @@ -0,0 +1,50 @@ +// (C) Copyright John Maddock 2006. +// (C) Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include "math_unit_test.hpp" + +template +void test_spots(T, const char* t) +{ + std::cout << "Testing basic sanity checks for type " << t << std::endl; + // + // Basic sanity checks, tolerance is 3 epsilon: + // + T tolerance = 3; + // + // Special tolerance (200eps) for when we're very near the root, + // and T has more than 64-bits in it's mantissa: + // + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(0.125)), static_cast(-8.3884926632958548678027429230863430000514460424495L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(0.5)), static_cast(-1.9635100260214234794409763329987555671931596046604L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(1)), static_cast(-0.57721566490153286060651209008240243104215933593992L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(1.5)), static_cast(0.036489973978576520559023667001244432806840395339566L), tolerance * 40); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(1.5) - static_cast(1)/32), static_cast(0.00686541147073577672813890866512415766586241385896200579891429L), tolerance * 200); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(2)), static_cast(0.42278433509846713939348790991759756895784066406008L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(8)), static_cast(2.0156414779556099965363450527747404261006978069172L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(12)), static_cast(2.4426616799758120167383652547949424463027180089374L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(22)), static_cast(3.0681430398611966699248760264450329818421699570581L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(50)), static_cast(3.9019896734278921969539597028823666609284424880275L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(500)), static_cast(6.2136077650889917423827750552855712637776544784569L), tolerance); + // + // negative values: + // + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(-0.125)), static_cast(7.1959829284523046176757814502538535827603450463013L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(-10.125)), static_cast(9.9480538258660761287008034071425343357982429855241L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(-10.875)), static_cast(-5.1527360383841562620205965901515879492020193154231L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(-1.5)), static_cast(0.70315664064524318722569033366791109947350706200623L), tolerance); +} + +int main() +{ + test_spots(0.0F, "float"); + test_spots(0.0, "double"); + + return boost::math::test::report_errors(); +} + + diff --git a/test/test_ellint_1.cpp b/test/test_ellint_1.cpp index b5cb2a359e..9366e4545a 100644 --- a/test/test_ellint_1.cpp +++ b/test/test_ellint_1.cpp @@ -6,7 +6,14 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif + +#ifndef BOOST_MATH_OVERFLOW_ERROR_POLICY +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#endif + #include "test_ellint_1.hpp" // diff --git a/test/test_ellint_1.hpp b/test/test_ellint_1.hpp index 635bcf2293..786841302c 100644 --- a/test/test_ellint_1.hpp +++ b/test/test_ellint_1.hpp @@ -9,11 +9,15 @@ // Constants are too big for float case, but this doesn't matter for test. #endif +#include +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include +#endif #define BOOST_TEST_MAIN #include #include #include +#include #include #include "functor.hpp" @@ -139,11 +143,13 @@ void test_spots(T, const char* type_name) // // Test error handling: // + #ifndef BOOST_MATH_NO_EXCEPTIONS BOOST_CHECK_GE(boost::math::ellint_1(T(1)), boost::math::tools::max_value()); BOOST_CHECK_GE(boost::math::ellint_1(T(-1)), boost::math::tools::max_value()); BOOST_CHECK_THROW(boost::math::ellint_1(T(1.0001)), std::domain_error); BOOST_CHECK_THROW(boost::math::ellint_1(T(-1.0001)), std::domain_error); BOOST_CHECK_THROW(boost::math::ellint_1(T(2.2), T(0.5)), std::domain_error); BOOST_CHECK_THROW(boost::math::ellint_1(T(-2.2), T(0.5)), std::domain_error); + #endif } diff --git a/test/test_ellint_1_double.cu b/test/test_ellint_1_double.cu new file mode 100644 index 0000000000..eb9bfb162d --- /dev/null +++ b/test/test_ellint_1_double.cu @@ -0,0 +1,106 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ellint_1(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ellint_1(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_ellint_1_float.cu b/test/test_ellint_1_float.cu new file mode 100644 index 0000000000..8de959d225 --- /dev/null +++ b/test/test_ellint_1_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ellint_1(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ellint_1(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_ellint_1_nvrtc_double.cpp b/test/test_ellint_1_nvrtc_double.cpp new file mode 100644 index 0000000000..fac5da55f0 --- /dev/null +++ b/test/test_ellint_1_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_ellint_1_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ellint_1(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ellint_1_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ellint_1_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ellint_1_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::ellint_1(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ellint_1_nvrtc_float.cpp b/test/test_ellint_1_nvrtc_float.cpp new file mode 100644 index 0000000000..fac5da55f0 --- /dev/null +++ b/test/test_ellint_1_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_ellint_1_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ellint_1(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ellint_1_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ellint_1_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ellint_1_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::ellint_1(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ellint_2.cpp b/test/test_ellint_2.cpp index ca3e994d4d..0da012c133 100644 --- a/test/test_ellint_2.cpp +++ b/test/test_ellint_2.cpp @@ -6,7 +6,10 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif + #include "test_ellint_2.hpp" // @@ -72,7 +75,11 @@ void expected_results() ".*", // platform largest_type, // test type(s) ".*", // test data group + #ifdef SYCL_LANGUAGE_VERSION + ".*", 20, 6); // test function + #else ".*", 15, 6); // test function + #endif add_expected_result( ".*", // compiler ".*", // stdlib diff --git a/test/test_ellint_2.hpp b/test/test_ellint_2.hpp index e38f94d984..29a73c9961 100644 --- a/test/test_ellint_2.hpp +++ b/test/test_ellint_2.hpp @@ -9,11 +9,18 @@ // Constants are too big for float case, but this doesn't matter for test. #endif +#include + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include +#endif + #define BOOST_TEST_MAIN #include #include #include +#include +#include #include #include "functor.hpp" @@ -157,10 +164,12 @@ void test_spots(T, const char* type_name) // // Test error handling: // + #ifndef BOOST_MATH_NO_EXCEPTIONS BOOST_CHECK_EQUAL(boost::math::ellint_2(T(1)), T(1)); BOOST_CHECK_EQUAL(boost::math::ellint_2(T(-1)), T(1)); BOOST_CHECK_THROW(boost::math::ellint_2(T(1.5)), std::domain_error); BOOST_CHECK_THROW(boost::math::ellint_2(T(-1.5)), std::domain_error); BOOST_CHECK_THROW(boost::math::ellint_2(T(1.5), T(1.5)), std::domain_error); + #endif } diff --git a/test/test_ellint_2_double.cu b/test/test_ellint_2_double.cu new file mode 100644 index 0000000000..2e1073576e --- /dev/null +++ b/test/test_ellint_2_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ellint_2(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ellint_2(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_ellint_2_float.cu b/test/test_ellint_2_float.cu new file mode 100644 index 0000000000..a55a6d1ad4 --- /dev/null +++ b/test/test_ellint_2_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ellint_2(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ellint_2(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_ellint_2_nvrtc_double.cpp b/test/test_ellint_2_nvrtc_double.cpp new file mode 100644 index 0000000000..dd2eef1547 --- /dev/null +++ b/test/test_ellint_2_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_ellint_2_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ellint_2(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ellint_2_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ellint_2_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ellint_2_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::ellint_2(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ellint_2_nvrtc_float.cpp b/test/test_ellint_2_nvrtc_float.cpp new file mode 100644 index 0000000000..dd2eef1547 --- /dev/null +++ b/test/test_ellint_2_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_ellint_2_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ellint_2(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ellint_2_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ellint_2_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ellint_2_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::ellint_2(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ellint_3_double.cu b/test/test_ellint_3_double.cu new file mode 100644 index 0000000000..979e01ff18 --- /dev/null +++ b/test/test_ellint_3_double.cu @@ -0,0 +1,104 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ellint_3(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ellint_3(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_ellint_3_float.cu b/test/test_ellint_3_float.cu new file mode 100644 index 0000000000..979e01ff18 --- /dev/null +++ b/test/test_ellint_3_float.cu @@ -0,0 +1,104 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ellint_3(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ellint_3(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_ellint_3_nvrtc_double.cpp b/test/test_ellint_3_nvrtc_double.cpp new file mode 100644 index 0000000000..dacab66192 --- /dev/null +++ b/test/test_ellint_3_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_ellint_3_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ellint_3(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ellint_3_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ellint_3_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ellint_3_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::ellint_3(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ellint_3_nvrtc_float.cpp b/test/test_ellint_3_nvrtc_float.cpp new file mode 100644 index 0000000000..72b2ec71e7 --- /dev/null +++ b/test/test_ellint_3_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_ellint_3_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ellint_3(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ellint_3_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ellint_3_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ellint_3_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::ellint_3(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ellint_d.cpp b/test/test_ellint_d.cpp index 5e76a49fb6..420bc0c022 100644 --- a/test/test_ellint_d.cpp +++ b/test/test_ellint_d.cpp @@ -4,7 +4,10 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif + #include "test_ellint_d.hpp" // diff --git a/test/test_ellint_d.hpp b/test/test_ellint_d.hpp index de53936f1f..c33a4d942a 100644 --- a/test/test_ellint_d.hpp +++ b/test/test_ellint_d.hpp @@ -8,11 +8,17 @@ // Constants are too big for float case, but this doesn't matter for test. #endif +#include + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include +#endif + #define BOOST_TEST_MAIN #include #include #include +#include #include #include "functor.hpp" @@ -117,6 +123,7 @@ void test_spots(T, const char* type_name) do_test_ellint_d1(ellint_d_data, type_name, "Elliptic Integral D: Random Data"); + #ifdef BOOST_MATH_NO_EXCEPTIONS BOOST_MATH_CHECK_THROW(boost::math::ellint_d(T(1)), std::domain_error); BOOST_MATH_CHECK_THROW(boost::math::ellint_d(T(-1)), std::domain_error); BOOST_MATH_CHECK_THROW(boost::math::ellint_d(T(1.5)), std::domain_error); @@ -126,5 +133,6 @@ void test_spots(T, const char* type_name) BOOST_CHECK_EQUAL(boost::math::ellint_d(T(0.5), std::numeric_limits::infinity()), std::numeric_limits::infinity()); } BOOST_MATH_CHECK_THROW(boost::math::ellint_d(T(1.5), T(1.0)), std::domain_error); + #endif } diff --git a/test/test_ellint_d_double.cu b/test/test_ellint_d_double.cu new file mode 100644 index 0000000000..979e01ff18 --- /dev/null +++ b/test/test_ellint_d_double.cu @@ -0,0 +1,104 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ellint_3(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ellint_3(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_ellint_d_float.cu b/test/test_ellint_d_float.cu new file mode 100644 index 0000000000..50882aa76a --- /dev/null +++ b/test/test_ellint_d_float.cu @@ -0,0 +1,104 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ellint_3(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ellint_3(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_ellint_d_nvrtc_double.cpp b/test/test_ellint_d_nvrtc_double.cpp new file mode 100644 index 0000000000..cb65a2e731 --- /dev/null +++ b/test/test_ellint_d_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_ellint_d_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ellint_d(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ellint_d_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ellint_d_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ellint_d_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::ellint_d(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ellint_d_nvrtc_float.cpp b/test/test_ellint_d_nvrtc_float.cpp new file mode 100644 index 0000000000..727d9dcd17 --- /dev/null +++ b/test/test_ellint_d_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_ellint_d_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ellint_d(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ellint_d_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ellint_d_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ellint_d_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::ellint_d(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_erf.cpp b/test/test_erf.cpp index 5359039834..2232c1c759 100644 --- a/test/test_erf.cpp +++ b/test/test_erf.cpp @@ -4,7 +4,20 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wliteral-range" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Woverflow" +#endif + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#include #include "test_erf.hpp" // diff --git a/test/test_erf.hpp b/test/test_erf.hpp index dc42c81248..b70c739530 100644 --- a/test/test_erf.hpp +++ b/test/test_erf.hpp @@ -1,9 +1,11 @@ -// Copyright John Maddock 2006. -// Copyright Paul A. Bristow 2007, 2009 +// Copyright John Maddock 2006. +// Copyright Paul A. Bristow 2007, 2009 +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#include #include #define BOOST_TEST_MAIN #include @@ -21,6 +23,11 @@ #define SC_(x) static_cast::type>(BOOST_JOIN(x, L)) #endif +#ifdef BOOST_MATH_NO_EXCEPTIONS +# undef BOOST_CHECK_THROW +# define BOOST_CHECK_THROW(x, y) +#endif + template void do_test_erf(const T& data, const char* type_name, const char* test_name) { diff --git a/test/test_erf_double.cu b/test/test_erf_double.cu new file mode 100644 index 0000000000..3e8398262a --- /dev/null +++ b/test/test_erf_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::erf(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::erf(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_erf_float.cu b/test/test_erf_float.cu new file mode 100644 index 0000000000..6cbd07e6ae --- /dev/null +++ b/test/test_erf_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::erf(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::erf(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_erf_inv_double.cu b/test/test_erf_inv_double.cu new file mode 100644 index 0000000000..f540babbb2 --- /dev/null +++ b/test/test_erf_inv_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::erf_inv(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::erf_inv(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_erf_inv_float.cu b/test/test_erf_inv_float.cu new file mode 100644 index 0000000000..d9f37687f1 --- /dev/null +++ b/test/test_erf_inv_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::erf_inv(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::erf_inv(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_erf_inv_nvrtc_double.cpp b/test/test_erf_inv_nvrtc_double.cpp new file mode 100644 index 0000000000..5588b76689 --- /dev/null +++ b/test/test_erf_inv_nvrtc_double.cpp @@ -0,0 +1,186 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +extern "C" __global__ +void test_erf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::erf_inv(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_erf_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_erf_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_erf_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::erf_inv(h_in1[i]); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_erf_inv_nvrtc_float.cpp b/test/test_erf_inv_nvrtc_float.cpp new file mode 100644 index 0000000000..ff7f6db98b --- /dev/null +++ b/test/test_erf_inv_nvrtc_float.cpp @@ -0,0 +1,186 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +extern "C" __global__ +void test_erf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::erf_inv(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_erf_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_erf_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_erf_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::erf_inv(h_in1[i]); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_erf_nvrtc_double.cpp b/test/test_erf_nvrtc_double.cpp new file mode 100644 index 0000000000..e20d0188d0 --- /dev/null +++ b/test/test_erf_nvrtc_double.cpp @@ -0,0 +1,186 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +extern "C" __global__ +void test_erf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::erf(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_erf_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_erf_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_erf_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::erf(h_in1[i]); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_erf_nvrtc_float.cpp b/test/test_erf_nvrtc_float.cpp new file mode 100644 index 0000000000..913b1a14cc --- /dev/null +++ b/test/test_erf_nvrtc_float.cpp @@ -0,0 +1,186 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +extern "C" __global__ +void test_erf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::erf(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_erf_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_erf_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_erf_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::erf(h_in1[i]); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_erfc_double.cu b/test/test_erfc_double.cu new file mode 100644 index 0000000000..86d3c6e5b4 --- /dev/null +++ b/test/test_erfc_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::erfc(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::erfc(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_erfc_float.cu b/test/test_erfc_float.cu new file mode 100644 index 0000000000..7970063a47 --- /dev/null +++ b/test/test_erfc_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::erfc(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::erfc(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_erfc_inv_double.cu b/test/test_erfc_inv_double.cu new file mode 100644 index 0000000000..68642cd109 --- /dev/null +++ b/test/test_erfc_inv_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::erfc_inv(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::erfc_inv(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_erfc_inv_float.cu b/test/test_erfc_inv_float.cu new file mode 100644 index 0000000000..b5b72cd057 --- /dev/null +++ b/test/test_erfc_inv_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::erfc_inv(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::erfc_inv(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_erfc_inv_nvrtc_double.cpp b/test/test_erfc_inv_nvrtc_double.cpp new file mode 100644 index 0000000000..ae961d657e --- /dev/null +++ b/test/test_erfc_inv_nvrtc_double.cpp @@ -0,0 +1,186 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +extern "C" __global__ +void test_erf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::erfc_inv(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_erf_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_erf_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_erf_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::erfc_inv(h_in1[i]); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_erfc_inv_nvrtc_float.cpp b/test/test_erfc_inv_nvrtc_float.cpp new file mode 100644 index 0000000000..b676330ceb --- /dev/null +++ b/test/test_erfc_inv_nvrtc_float.cpp @@ -0,0 +1,186 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +extern "C" __global__ +void test_erf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::erfc_inv(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_erf_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_erf_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_erf_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::erfc_inv(h_in1[i]); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_erfc_nvrtc_double.cpp b/test/test_erfc_nvrtc_double.cpp new file mode 100644 index 0000000000..c43a469acf --- /dev/null +++ b/test/test_erfc_nvrtc_double.cpp @@ -0,0 +1,186 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +extern "C" __global__ +void test_erf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::erfc(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_erf_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_erf_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_erf_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::erfc(h_in1[i]); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_erfc_nvrtc_float.cpp b/test/test_erfc_nvrtc_float.cpp new file mode 100644 index 0000000000..f8756045a8 --- /dev/null +++ b/test/test_erfc_nvrtc_float.cpp @@ -0,0 +1,186 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +extern "C" __global__ +void test_erf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::erfc(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_erf_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_erf_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_erf_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::erfc(h_in1[i]); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_exp_sinh_quad_double.cu b/test/test_exp_sinh_quad_double.cu new file mode 100644 index 0000000000..59f6d8a12f --- /dev/null +++ b/test/test_exp_sinh_quad_double.cu @@ -0,0 +1,133 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +__host__ __device__ float_type func(float_type x) +{ + BOOST_MATH_STD_USING + return 1/(1+x*x); +} + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::size_t levels; + + if (i < numElements) + { + out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = M_PI * (static_cast(i) / numElements); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::quadrature::exp_sinh integrator; + for(int i = 0; i < numElements; ++i) + { + results.push_back(integrator.integrate(func, tol, &error, &L1)); + } + double t = w.elapsed(); + // check the results + int failed_count = 0; + for(int i = 0; i < numElements; ++i) + { + const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]); + if (eps > 10) + { + std::cerr << std::setprecision(std::numeric_limits::digits10) + << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i] + << "\n Host: " << results[i] + << "\n Eps: " << eps << "\n"; + failed_count++; + } + if (failed_count > 100) + { + break; + } + } + + if (failed_count != 0) + { + std::cout << "Test FAILED" << std::endl; + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_exp_sinh_quad_float.cu b/test/test_exp_sinh_quad_float.cu new file mode 100644 index 0000000000..1472dbcde8 --- /dev/null +++ b/test/test_exp_sinh_quad_float.cu @@ -0,0 +1,133 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +__host__ __device__ float_type func(float_type x) +{ + BOOST_MATH_STD_USING + return 1/(1+x*x); +} + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::size_t levels; + + if (i < numElements) + { + out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = M_PI * (static_cast(i) / numElements); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::quadrature::exp_sinh integrator; + for(int i = 0; i < numElements; ++i) + { + results.push_back(integrator.integrate(func, tol, &error, &L1)); + } + double t = w.elapsed(); + // check the results + int failed_count = 0; + for(int i = 0; i < numElements; ++i) + { + const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]); + if (eps > 10) + { + std::cerr << std::setprecision(std::numeric_limits::digits10) + << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i] + << "\n Host: " << results[i] + << "\n Eps: " << eps << "\n"; + failed_count++; + } + if (failed_count > 100) + { + break; + } + } + + if (failed_count != 0) + { + std::cout << "Test FAILED" << std::endl; + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_exp_sinh_quad_nvrtc_double.cpp b/test/test_exp_sinh_quad_nvrtc_double.cpp new file mode 100644 index 0000000000..bfd5080928 --- /dev/null +++ b/test/test_exp_sinh_quad_nvrtc_double.cpp @@ -0,0 +1,206 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include + +__host__ __device__ float_type func(float_type x) +{ + return 1/(1+x*x); +} + +extern "C" __global__ +void test_expm1_kernel(const float_type*, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::size_t levels; + + if (i < numElements) + { + out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels); + } +} +)"; + +__host__ __device__ float_type func(float_type x) +{ + return 1/(1+x*x); +} + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_expm1_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_expm1_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_expm1_kernel"), "Failed to get kernel function"); + + int numElements = 50000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::quadrature::exp_sinh integrator; + for (int i = 0; i < numElements; ++i) + { + auto res = integrator.integrate(func, tol, &error, &L1); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_exp_sinh_quad_nvrtc_float.cpp b/test/test_exp_sinh_quad_nvrtc_float.cpp new file mode 100644 index 0000000000..b472e5597c --- /dev/null +++ b/test/test_exp_sinh_quad_nvrtc_float.cpp @@ -0,0 +1,206 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include + +__host__ __device__ float_type func(float_type x) +{ + return 1/(1+x*x); +} + +extern "C" __global__ +void test_expm1_kernel(const float_type*, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::size_t levels; + + if (i < numElements) + { + out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels); + } +} +)"; + +__host__ __device__ float_type func(float_type x) +{ + return 1/(1+x*x); +} + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_expm1_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_expm1_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_expm1_kernel"), "Failed to get kernel function"); + + int numElements = 50000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::quadrature::exp_sinh integrator; + for (int i = 0; i < numElements; ++i) + { + auto res = integrator.integrate(func, tol, &error, &L1); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_expint.cpp b/test/test_expint.cpp index 3f44a80915..3eede5e389 100644 --- a/test/test_expint.cpp +++ b/test/test_expint.cpp @@ -3,7 +3,14 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif + +#ifndef BOOST_MATH_OVERFLOW_ERROR_POLICY +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#endif + #include "test_expint.hpp" // @@ -78,7 +85,11 @@ void expected_results() ".*", // platform "float|double|long double", // test type(s) ".*Ei.*", // test data group + #ifndef SYCL_LANGUAGE_VERSION ".*", 6, 3); // test function + #else + ".*", 10, 3); + #endif if(std::numeric_limits::digits > 100) { add_expected_result( diff --git a/test/test_expint.hpp b/test/test_expint.hpp index 491db2fcdc..d6524810e4 100644 --- a/test/test_expint.hpp +++ b/test/test_expint.hpp @@ -4,13 +4,19 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#include + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include +#endif + #include +#include #define BOOST_TEST_MAIN #include #include #include -#include +#include "../include_private/boost/math/tools/test.hpp" #include #include #include diff --git a/test/test_expint_double.cu b/test/test_expint_double.cu new file mode 100644 index 0000000000..d82e90a937 --- /dev/null +++ b/test/test_expint_double.cu @@ -0,0 +1,106 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::expint(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::expint(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_expint_float.cu b/test/test_expint_float.cu new file mode 100644 index 0000000000..dd1fccd1d7 --- /dev/null +++ b/test/test_expint_float.cu @@ -0,0 +1,106 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::expint(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::expint(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_expint_nvrtc_double.cpp b/test/test_expint_nvrtc_double.cpp new file mode 100644 index 0000000000..3ab45e6a1a --- /dev/null +++ b/test/test_expint_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_expint_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::expint(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_expint_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_expint_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_expint_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::expint(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_expint_nvrtc_float.cpp b/test/test_expint_nvrtc_float.cpp new file mode 100644 index 0000000000..bff58580eb --- /dev/null +++ b/test/test_expint_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_expint_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::expint(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_expint_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_expint_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_expint_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::expint(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_expm1_double.cu b/test/test_expm1_double.cu new file mode 100644 index 0000000000..cfed7d840d --- /dev/null +++ b/test/test_expm1_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::expm1(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::expm1(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_expm1_float.cu b/test/test_expm1_float.cu new file mode 100644 index 0000000000..3d439b8872 --- /dev/null +++ b/test/test_expm1_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::expm1(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::expm1(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_expm1_nvrtc_double.cpp b/test/test_expm1_nvrtc_double.cpp new file mode 100644 index 0000000000..ea496b73a7 --- /dev/null +++ b/test/test_expm1_nvrtc_double.cpp @@ -0,0 +1,186 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +extern "C" __global__ +void test_expm1_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::expm1(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_expm1_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_expm1_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_expm1_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::expm1(h_in1[i]); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_expm1_nvrtc_float.cpp b/test/test_expm1_nvrtc_float.cpp new file mode 100644 index 0000000000..16d8636db5 --- /dev/null +++ b/test/test_expm1_nvrtc_float.cpp @@ -0,0 +1,186 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +extern "C" __global__ +void test_expm1_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::expm1(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_expm1_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_expm1_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_expm1_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::expm1(h_in1[i]); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_expm1_simple.cpp b/test/test_expm1_simple.cpp new file mode 100644 index 0000000000..00513ea409 --- /dev/null +++ b/test/test_expm1_simple.cpp @@ -0,0 +1,32 @@ +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include "math_unit_test.hpp" + +constexpr int N = 50000; + +template +void test() +{ + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0, 0.01); + + for (int n = 0; n < N; ++n) + { + const T value (dist(rng)); + CHECK_ULP_CLOSE(std::expm1(value), boost::math::expm1(value), 10); + } +} + +int main() +{ + test(); + test(); + + return boost::math::test::report_errors(); +} diff --git a/test/test_exponential_cdf_double.cu b/test/test_exponential_cdf_double.cu new file mode 100644 index 0000000000..e3a57e86ec --- /dev/null +++ b/test/test_exponential_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::exponential_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::exponential_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_exponential_cdf_float.cu b/test/test_exponential_cdf_float.cu new file mode 100644 index 0000000000..ed214a4953 --- /dev/null +++ b/test/test_exponential_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::exponential_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::exponential_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_exponential_cdf_nvrtc_double.cpp b/test/test_exponential_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..42849f8abe --- /dev/null +++ b/test/test_exponential_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_exponential_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::exponential_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_exponential_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_exponential_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_exponential_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::exponential_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_exponential_cdf_nvrtc_float.cpp b/test/test_exponential_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..9417abf026 --- /dev/null +++ b/test/test_exponential_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_exponential_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::exponential_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_exponential_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_exponential_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_exponential_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::exponential_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_exponential_dist.cpp b/test/test_exponential_dist.cpp index d1898fa7c4..1b21df1411 100644 --- a/test/test_exponential_dist.cpp +++ b/test/test_exponential_dist.cpp @@ -8,7 +8,7 @@ // test_exponential_dist.cpp -#include +#include "../include_private/boost/math/tools/test.hpp" #include // for real_concept #include using boost::math::exponential_distribution; diff --git a/test/test_exponential_pdf_double.cu b/test/test_exponential_pdf_double.cu new file mode 100644 index 0000000000..530b1023b3 --- /dev/null +++ b/test/test_exponential_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::exponential_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::exponential_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_exponential_pdf_float.cu b/test/test_exponential_pdf_float.cu new file mode 100644 index 0000000000..0801e2d0be --- /dev/null +++ b/test/test_exponential_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::exponential_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::exponential_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_exponential_pdf_nvrtc_double.cpp b/test/test_exponential_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..2c52a4b38e --- /dev/null +++ b/test/test_exponential_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_exponential_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::exponential_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_exponential_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_exponential_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_exponential_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::exponential_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_exponential_pdf_nvrtc_float.cpp b/test/test_exponential_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..ef1aff3f3c --- /dev/null +++ b/test/test_exponential_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_exponential_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::exponential_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_exponential_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_exponential_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_exponential_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::exponential_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_exponential_quan_double.cu b/test/test_exponential_quan_double.cu new file mode 100644 index 0000000000..f4eb4c3b18 --- /dev/null +++ b/test/test_exponential_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::exponential_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::exponential_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_exponential_quan_float.cu b/test/test_exponential_quan_float.cu new file mode 100644 index 0000000000..f4eb4c3b18 --- /dev/null +++ b/test/test_exponential_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::exponential_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::exponential_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_exponential_quan_nvrtc_double.cpp b/test/test_exponential_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..b05f77ffef --- /dev/null +++ b/test/test_exponential_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_exponential_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::exponential_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_exponential_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_exponential_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_exponential_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::exponential_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_exponential_quan_nvrtc_float.cpp b/test/test_exponential_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..fb932c5d7c --- /dev/null +++ b/test/test_exponential_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_exponential_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::exponential_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_exponential_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_exponential_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_exponential_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::exponential_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_exponential_range_support_double.cu b/test/test_exponential_range_support_double.cu new file mode 100644 index 0000000000..c19497ed50 --- /dev/null +++ b/test/test_exponential_range_support_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type* in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = support(boost::math::exponential_distribution(in1[i])).second; + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(support(boost::math::exponential_distribution(input_vector1[i])).second); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_exponential_range_support_float.cu b/test/test_exponential_range_support_float.cu new file mode 100644 index 0000000000..a111090de5 --- /dev/null +++ b/test/test_exponential_range_support_float.cu @@ -0,0 +1,111 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type* in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = range(boost::math::exponential_distribution(in1[i])).first; + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(range(boost::math::exponential_distribution(input_vector1[i])).first); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + std::cerr << "Device got: " << output_vector[i] << ", and serial got: " << results[i] << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_extreme_value.cpp b/test/test_extreme_value.cpp index fd8d928630..cb758e8f5b 100644 --- a/test/test_extreme_value.cpp +++ b/test/test_extreme_value.cpp @@ -1,5 +1,5 @@ // Copyright John Maddock 2006. - +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt @@ -7,7 +7,7 @@ // test_extreme_value.cpp -#include +#include "../include_private/boost/math/tools/test.hpp" #include // for real_concept #include using boost::math::extreme_value_distribution; diff --git a/test/test_extreme_value_cdf_double.cu b/test/test_extreme_value_cdf_double.cu new file mode 100644 index 0000000000..7ca0003482 --- /dev/null +++ b/test/test_extreme_value_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::extreme_value_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::extreme_value_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_extreme_value_cdf_float.cu b/test/test_extreme_value_cdf_float.cu new file mode 100644 index 0000000000..bc3ead6ebb --- /dev/null +++ b/test/test_extreme_value_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::extreme_value_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::extreme_value_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_extreme_value_cdf_nvrtc_double.cpp b/test/test_extreme_value_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..b3600b21bf --- /dev/null +++ b/test/test_extreme_value_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_extreme_value_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::extreme_value_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_extreme_value_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_extreme_value_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_extreme_value_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::extreme_value_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_extreme_value_cdf_nvrtc_float.cpp b/test/test_extreme_value_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..b3600b21bf --- /dev/null +++ b/test/test_extreme_value_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_extreme_value_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::extreme_value_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_extreme_value_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_extreme_value_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_extreme_value_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::extreme_value_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_extreme_value_pdf_double.cu b/test/test_extreme_value_pdf_double.cu new file mode 100644 index 0000000000..44ccc5b716 --- /dev/null +++ b/test/test_extreme_value_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::extreme_value_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::extreme_value_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_extreme_value_pdf_float.cu b/test/test_extreme_value_pdf_float.cu new file mode 100644 index 0000000000..390622f400 --- /dev/null +++ b/test/test_extreme_value_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::extreme_value_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::extreme_value_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_extreme_value_pdf_nvrtc_double.cpp b/test/test_extreme_value_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..049f45d890 --- /dev/null +++ b/test/test_extreme_value_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_extreme_value_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::extreme_value_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_extreme_value_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_extreme_value_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_extreme_value_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::extreme_value_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_extreme_value_pdf_nvrtc_float.cpp b/test/test_extreme_value_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..adbd263489 --- /dev/null +++ b/test/test_extreme_value_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_extreme_value_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::extreme_value_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_extreme_value_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_extreme_value_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_extreme_value_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::extreme_value_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_extreme_value_quan_double.cu b/test/test_extreme_value_quan_double.cu new file mode 100644 index 0000000000..41f2f69a68 --- /dev/null +++ b/test/test_extreme_value_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::extreme_value_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::extreme_value_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 1000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_extreme_value_quan_float.cu b/test/test_extreme_value_quan_float.cu new file mode 100644 index 0000000000..5fe16e9a8c --- /dev/null +++ b/test/test_extreme_value_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::extreme_value_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::extreme_value_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 2000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_extreme_value_quan_nvrtc_double.cpp b/test/test_extreme_value_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..239df99949 --- /dev/null +++ b/test/test_extreme_value_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_extreme_value_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::extreme_value_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_extreme_value_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_extreme_value_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_extreme_value_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::extreme_value_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_extreme_value_quan_nvrtc_float.cpp b/test/test_extreme_value_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..fc9d8c5f41 --- /dev/null +++ b/test/test_extreme_value_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_extreme_value_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::extreme_value_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_extreme_value_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_extreme_value_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_extreme_value_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::extreme_value_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_fisher_f.cpp b/test/test_fisher_f.cpp index c18ed8ff1c..f142a33273 100644 --- a/test/test_fisher_f.cpp +++ b/test/test_fisher_f.cpp @@ -8,9 +8,13 @@ // (See accompanying file LICENSE_1_0.txt // or copy at http://www.boost.org/LICENSE_1_0.txt) -#include +#include +#include "../include_private/boost/math/tools/test.hpp" + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include // for real_concept using ::boost::math::concepts::real_concept; +#endif #include // for fisher_f_distribution using boost::math::fisher_f_distribution; diff --git a/test/test_fisher_f_cdf_double.cu b/test/test_fisher_f_cdf_double.cu new file mode 100644 index 0000000000..c6d6f0a94c --- /dev/null +++ b/test/test_fisher_f_cdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::fisher_f_distribution(1, 1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::fisher_f_distribution(1, 1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_fisher_f_cdf_float.cu b/test/test_fisher_f_cdf_float.cu new file mode 100644 index 0000000000..9df1bc8695 --- /dev/null +++ b/test/test_fisher_f_cdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::fisher_f_distribution(1, 1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::fisher_f_distribution(1, 1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_fisher_f_cdf_nvrtc_double.cpp b/test/test_fisher_f_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..1eb9cb00f6 --- /dev/null +++ b/test/test_fisher_f_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_fisher_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::fisher_f_distribution(1, 1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_fisher_f_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_fisher_f_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_fisher_f_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::fisher_f_distribution(1, 1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_fisher_f_cdf_nvrtc_float.cpp b/test/test_fisher_f_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..244190cf19 --- /dev/null +++ b/test/test_fisher_f_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_fisher_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::fisher_f_distribution(1, 1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_fisher_f_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_fisher_f_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_fisher_f_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::fisher_f_distribution(1, 1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_fisher_f_pdf_double.cu b/test/test_fisher_f_pdf_double.cu new file mode 100644 index 0000000000..77a3b655ab --- /dev/null +++ b/test/test_fisher_f_pdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::fisher_f_distribution(1, 1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::fisher_f_distribution(1, 1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_fisher_f_pdf_float.cu b/test/test_fisher_f_pdf_float.cu new file mode 100644 index 0000000000..323edf3424 --- /dev/null +++ b/test/test_fisher_f_pdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::fisher_f_distribution(1, 1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::fisher_f_distribution(1, 1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_fisher_f_pdf_nvrtc_double.cpp b/test/test_fisher_f_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..8aa1482aae --- /dev/null +++ b/test/test_fisher_f_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_fisher_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::fisher_f_distribution(1, 1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_fisher_f_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_fisher_f_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_fisher_f_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::fisher_f_distribution(1, 1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_fisher_f_pdf_nvrtc_float.cpp b/test/test_fisher_f_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..e461dea9af --- /dev/null +++ b/test/test_fisher_f_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_fisher_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::fisher_f_distribution(1, 1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_fisher_f_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_fisher_f_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_fisher_f_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::fisher_f_distribution(1, 1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_fisher_f_quan_double.cu b/test/test_fisher_f_quan_double.cu new file mode 100644 index 0000000000..c16eb2a952 --- /dev/null +++ b/test/test_fisher_f_quan_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::fisher_f_distribution(1, 1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::fisher_f_distribution(1, 1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_fisher_f_quan_float.cu b/test/test_fisher_f_quan_float.cu new file mode 100644 index 0000000000..85cf479670 --- /dev/null +++ b/test/test_fisher_f_quan_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::fisher_f_distribution(1, 1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::fisher_f_distribution(1, 1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_fisher_f_quan_nvrtc_double.cpp b/test/test_fisher_f_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..16ad0cbc03 --- /dev/null +++ b/test/test_fisher_f_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_fisher_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::fisher_f_distribution(1, 1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_fisher_f_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_fisher_f_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_fisher_f_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::fisher_f_distribution(1, 1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_fisher_f_quan_nvrtc_float.cpp b/test/test_fisher_f_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..377048e526 --- /dev/null +++ b/test/test_fisher_f_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_fisher_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::fisher_f_distribution(1, 1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_fisher_f_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_fisher_f_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_fisher_f_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::fisher_f_distribution(1, 1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_float_io.cpp b/test/test_float_io.cpp index 107cc39d4a..4c1ee1760a 100644 --- a/test/test_float_io.cpp +++ b/test/test_float_io.cpp @@ -301,7 +301,7 @@ void test() std::ios_base::fixed | std::ios_base::showpos}}; std::array, 40> string_data = {{ -#include "libs/math/test/string_data.ipp" +#include "string_data.ipp" }}; double num = 123456789.0; @@ -384,10 +384,10 @@ T generate_random() val += gen(); } e_type e; - val = frexp(val, &e); + val = std::frexp(val, &e); static boost::random::uniform_int_distribution ui(0, std::numeric_limits::max_exponent - 10); - return ldexp(val, ui(gen)); + return std::ldexp(val, ui(gen)); } template diff --git a/test/test_fpclassify_nvrtc_double.cpp b/test/test_fpclassify_nvrtc_double.cpp new file mode 100644 index 0000000000..0a99ddaa8e --- /dev/null +++ b/test/test_fpclassify_nvrtc_double.cpp @@ -0,0 +1,198 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::isnan(in1[i]) + + boost::math::isinf(in1[i]) + + boost::math::isfinite(in1[i]) + + boost::math::isnormal(in1[i]) + + boost::math::fpclassify(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_gamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::isnan(h_in1[i]) + + boost::math::isinf(h_in1[i]) + + boost::math::isfinite(h_in1[i]) + + boost::math::isnormal(h_in1[i]) + + boost::math::fpclassify(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_fpclassify_nvrtc_float.cpp b/test/test_fpclassify_nvrtc_float.cpp new file mode 100644 index 0000000000..04416e77d2 --- /dev/null +++ b/test/test_fpclassify_nvrtc_float.cpp @@ -0,0 +1,198 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::isnan(in1[i]) + + boost::math::isinf(in1[i]) + + boost::math::isfinite(in1[i]) + + boost::math::isnormal(in1[i]) + + boost::math::fpclassify(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_gamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::isnan(h_in1[i]) + + boost::math::isinf(h_in1[i]) + + boost::math::isfinite(h_in1[i]) + + boost::math::isnormal(h_in1[i]) + + boost::math::fpclassify(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_gamma.cpp b/test/test_gamma.cpp index 6b2d19cca6..fb86080d72 100644 --- a/test/test_gamma.cpp +++ b/test/test_gamma.cpp @@ -3,7 +3,12 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#include #include "test_gamma.hpp" // diff --git a/test/test_gamma.hpp b/test/test_gamma.hpp index c21573dac6..7376573ad2 100644 --- a/test/test_gamma.hpp +++ b/test/test_gamma.hpp @@ -4,8 +4,7 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error - +#include #include #include #define BOOST_TEST_MAIN @@ -13,7 +12,7 @@ #include #include // for has_denorm_now #include -#include +#include "../include_private/boost/math/tools/test.hpp" #include #include #include @@ -320,11 +319,13 @@ void test_spots(T, const char* name) BOOST_CHECK(sign == -1); } + #ifndef BOOST_MATH_HAS_GPU_SUPPORT if(boost::math::detail::has_denorm_now() && std::numeric_limits::has_infinity && (boost::math::isinf)(1 / std::numeric_limits::denorm_min())) { BOOST_CHECK_EQUAL(boost::math::tgamma(-std::numeric_limits::denorm_min()), -std::numeric_limits::infinity()); BOOST_CHECK_EQUAL(boost::math::tgamma(std::numeric_limits::denorm_min()), std::numeric_limits::infinity()); } + #endif // // Extra large values for lgamma, see https://github.com/boostorg/math/issues/242 // diff --git a/test/test_gamma_dist.cpp b/test/test_gamma_dist.cpp index b7776c79cb..2b1a181f33 100644 --- a/test/test_gamma_dist.cpp +++ b/test/test_gamma_dist.cpp @@ -15,16 +15,23 @@ // From MathWorld--A Wolfram Web Resource. // http://mathworld.wolfram.com/GammaDistribution.html +#ifndef SYCL_LANGUAGE_VERSION #include // include directory libs/math/src/tr1/ is needed. +#endif + +#include +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include // for real_concept +#endif + #define BOOST_TEST_MAIN #include // Boost.Test #include #include using boost::math::gamma_distribution; -#include +#include "../include_private/boost/math/tools/test.hpp" #include "test_out_of_range.hpp" #include diff --git a/test/test_gamma_dist_cdf_double.cu b/test/test_gamma_dist_cdf_double.cu new file mode 100644 index 0000000000..4777196aa1 --- /dev/null +++ b/test/test_gamma_dist_cdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::gamma_distribution(1, 1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::gamma_distribution(1, 1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_gamma_dist_cdf_float.cu b/test/test_gamma_dist_cdf_float.cu new file mode 100644 index 0000000000..a93aca3950 --- /dev/null +++ b/test/test_gamma_dist_cdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::gamma_distribution(1, 1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::gamma_distribution(1, 1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_gamma_dist_cdf_nvrtc_double.cpp b/test/test_gamma_dist_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..3e911f4e05 --- /dev/null +++ b/test/test_gamma_dist_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::gamma_distribution(1, 1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_gamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::gamma_distribution(1, 1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_gamma_dist_cdf_nvrtc_float.cpp b/test/test_gamma_dist_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..17762d4066 --- /dev/null +++ b/test/test_gamma_dist_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::gamma_distribution(1, 1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_gamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::gamma_distribution(1, 1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_gamma_dist_pdf_double.cu b/test/test_gamma_dist_pdf_double.cu new file mode 100644 index 0000000000..a8411d5b6d --- /dev/null +++ b/test/test_gamma_dist_pdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::gamma_distribution(1, 1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::gamma_distribution(1, 1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_gamma_dist_pdf_float.cu b/test/test_gamma_dist_pdf_float.cu new file mode 100644 index 0000000000..6ab3247acb --- /dev/null +++ b/test/test_gamma_dist_pdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::gamma_distribution(1, 1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::gamma_distribution(1, 1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_gamma_dist_pdf_nvrtc_double.cpp b/test/test_gamma_dist_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..1faae99866 --- /dev/null +++ b/test/test_gamma_dist_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::gamma_distribution(1, 1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_gamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::gamma_distribution(1, 1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_gamma_dist_pdf_nvrtc_float.cpp b/test/test_gamma_dist_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..054ddbbadc --- /dev/null +++ b/test/test_gamma_dist_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::gamma_distribution(1, 1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_gamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::gamma_distribution(1, 1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_gamma_dist_quan_double.cu b/test/test_gamma_dist_quan_double.cu new file mode 100644 index 0000000000..d29bf6d6be --- /dev/null +++ b/test/test_gamma_dist_quan_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::gamma_distribution(1, 1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::gamma_distribution(1, 1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_gamma_dist_quan_float.cu b/test/test_gamma_dist_quan_float.cu new file mode 100644 index 0000000000..58aa42e90f --- /dev/null +++ b/test/test_gamma_dist_quan_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::gamma_distribution(1, 1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::gamma_distribution(1, 1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_gamma_dist_quan_nvrtc_double.cpp b/test/test_gamma_dist_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..132efcd6c1 --- /dev/null +++ b/test/test_gamma_dist_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::gamma_distribution(1, 1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_gamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::gamma_distribution(1, 1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_gamma_dist_quan_nvrtc_float.cpp b/test/test_gamma_dist_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..7749523abc --- /dev/null +++ b/test/test_gamma_dist_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::gamma_distribution(1, 1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_gamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::gamma_distribution(1, 1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_gamma_nvrtc_double.cpp b/test/test_gamma_nvrtc_double.cpp new file mode 100644 index 0000000000..9fe2933720 --- /dev/null +++ b/test/test_gamma_nvrtc_double.cpp @@ -0,0 +1,186 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +extern "C" __global__ +void test_gamma_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::tgamma(in1[i]) + boost::math::lgamma(in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_gamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::tgamma(h_in1[i]) + boost::math::lgamma(h_in2[i]); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_gamma_nvrtc_float.cpp b/test/test_gamma_nvrtc_float.cpp new file mode 100644 index 0000000000..5d34b130ad --- /dev/null +++ b/test/test_gamma_nvrtc_float.cpp @@ -0,0 +1,186 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +extern "C" __global__ +void test_gamma_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::tgamma(in1[i]) + boost::math::lgamma(in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_gamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/math/boost-root/libs/math/include/"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::tgamma(h_in1[i]) + boost::math::lgamma(h_in2[i]); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_gamma_p_derivative_double.cu b/test/test_gamma_p_derivative_double.cu new file mode 100644 index 0000000000..566bc1657f --- /dev/null +++ b/test/test_gamma_p_derivative_double.cu @@ -0,0 +1,104 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::gamma_p_derivative(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::gamma_p_derivative(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_gamma_p_derivative_float.cu b/test/test_gamma_p_derivative_float.cu new file mode 100644 index 0000000000..f9fd52a50c --- /dev/null +++ b/test/test_gamma_p_derivative_float.cu @@ -0,0 +1,104 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::gamma_p_derivative(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::gamma_p_derivative(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_gamma_p_derivative_nvrtc_double.cpp b/test/test_gamma_p_derivative_nvrtc_double.cpp new file mode 100644 index 0000000000..53a752c2df --- /dev/null +++ b/test/test_gamma_p_derivative_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_gamma_p_derivative_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::gamma_p_derivative(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_p_derivative_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_gamma_p_derivative_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_p_derivative_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::gamma_p_derivative(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_gamma_p_derivative_nvrtc_float.cpp b/test/test_gamma_p_derivative_nvrtc_float.cpp new file mode 100644 index 0000000000..da9c50855b --- /dev/null +++ b/test/test_gamma_p_derivative_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_gamma_p_derivative_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::gamma_p_derivative(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_p_derivative_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_gamma_p_derivative_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_p_derivative_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::gamma_p_derivative(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_gamma_p_inv_double.cu b/test/test_gamma_p_inv_double.cu new file mode 100644 index 0000000000..4392f37d38 --- /dev/null +++ b/test/test_gamma_p_inv_double.cu @@ -0,0 +1,108 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::gamma_p_inv(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + std::mt19937_64 gen(42); + std::uniform_real_distribution dist(0, 1); + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + input_vector2[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::gamma_p_inv(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 1000) + { + std::cerr << "Result verification failed at element " << i << "!\n" + << "Error found was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_gamma_p_inv_float.cu b/test/test_gamma_p_inv_float.cu new file mode 100644 index 0000000000..70033686c1 --- /dev/null +++ b/test/test_gamma_p_inv_float.cu @@ -0,0 +1,107 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::gamma_p_inv(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + std::mt19937_64 gen(42); + std::uniform_real_distribution dist(0, 1); + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + input_vector2[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::gamma_p_inv(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_gamma_p_inv_nvrtc_double.cpp b/test/test_gamma_p_inv_nvrtc_double.cpp new file mode 100644 index 0000000000..d270dbf901 --- /dev/null +++ b/test/test_gamma_p_inv_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_gamma_p_inv_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::gamma_p_inv(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_p_inv_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_gamma_p_inv_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_p_inv_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::gamma_p_inv(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_gamma_p_inv_nvrtc_float.cpp b/test/test_gamma_p_inv_nvrtc_float.cpp new file mode 100644 index 0000000000..7c844eb682 --- /dev/null +++ b/test/test_gamma_p_inv_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_gamma_p_inv_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::gamma_p_inv(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_p_inv_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_gamma_p_inv_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_p_inv_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::gamma_p_inv(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_gegenbauer_double.cu b/test/test_gegenbauer_double.cu new file mode 100644 index 0000000000..21278d7a82 --- /dev/null +++ b/test/test_gegenbauer_double.cu @@ -0,0 +1,125 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Gegenbauer prime uses all methods internally so it's the easy choice + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::gegenbauer_prime(2, in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results[i] = boost::math::gegenbauer_prime(2, input_vector1[i], input_vector2[i]); + double t = w.elapsed(); + // check the results + int failure_counter = 0; + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]); + // Most elements are under 50 but extremely small numbers very more greatly + if (eps > 1000) + { + std::cerr << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i] + << "\n Host: " << results[i] + << "\n Eps: " << eps << std::endl; + ++failure_counter; + if (failure_counter > 100) + { + break; + } + } + } + } + + if (failure_counter > 0) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_gegenbauer_float.cu b/test/test_gegenbauer_float.cu new file mode 100644 index 0000000000..b7affaecd4 --- /dev/null +++ b/test/test_gegenbauer_float.cu @@ -0,0 +1,124 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Gegenbauer prime uses all methods internally so it's the easy choice + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::gegenbauer_prime(2, in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results[i] = boost::math::gegenbauer_prime(2, input_vector1[i], input_vector2[i]); + double t = w.elapsed(); + // check the results + int failure_counter = 0; + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]); + if (eps > 1000) + { + std::cerr << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i] + << "\n Host: " << results[i] + << "\n Eps: " << eps << std::endl; + ++failure_counter; + if (failure_counter > 100) + { + break; + } + } + } + } + + if (failure_counter > 0) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_gegenbauer_nvrtc_double.cpp b/test/test_gegenbauer_nvrtc_double.cpp new file mode 100644 index 0000000000..0c8416cb61 --- /dev/null +++ b/test/test_gegenbauer_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_gegenbauer_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::gegenbauer(2, in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gegenbauer_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_gegenbauer_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_gegenbauer_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::gegenbauer(2, h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_gegenbauer_nvrtc_float.cpp b/test/test_gegenbauer_nvrtc_float.cpp new file mode 100644 index 0000000000..c0d3484175 --- /dev/null +++ b/test/test_gegenbauer_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_gegenbauer_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::gegenbauer(2, in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gegenbauer_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_gegenbauer_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_gegenbauer_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::gegenbauer(2, h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_geometric.cpp b/test/test_geometric.cpp index 928a2aa0ed..13a9e090b9 100644 --- a/test/test_geometric.cpp +++ b/test/test_geometric.cpp @@ -26,9 +26,14 @@ # define TEST_REAL_CONCEPT #endif -#include +#include + +#include "../include_private/boost/math/tools/test.hpp" + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include // for real_concept using ::boost::math::concepts::real_concept; +#endif #include // for geometric_distribution using boost::math::geometric_distribution; @@ -64,7 +69,11 @@ void test_spot( // Test a single spot value against 'known good' values. RealType tol, // Test tolerance RealType logtol) // Logcdf Test tolerance. { - BOOST_IF_CONSTEXPR (std::is_same::value || std::is_same::value) + BOOST_IF_CONSTEXPR (std::is_same::value + #ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS + || std::is_same::value + #endif + ) { logtol *= 100; } @@ -376,7 +385,9 @@ if(std::numeric_limits::is_specialized) static_cast(9.9000000000003448e-201L), // 100 * tolerance); // Note difference - // p nearer unity. + // p nearer unity. + // On GPU this gets flushed to 0 which has an eps difference of 3.4e+38 + #ifndef BOOST_MATH_HAS_GPU_SUPPORT BOOST_CHECK_CLOSE_FRACTION( // pdf(geometric_distribution(static_cast(0.9999)), static_cast(10) ), // Number of failures, k @@ -384,6 +395,7 @@ if(std::numeric_limits::is_specialized) // static_cast(1.00156406e-040) static_cast(9.999e-41), // exact from 100 digit calculator. 2e3 * tolerance); // Note bigger tolerance needed. + #endif // Moshier Cephes 100 digits calculator says 9.999e-41 //0.9999*pow(1-0.9999,10) diff --git a/test/test_geometric_dist_cdf_double.cu b/test/test_geometric_dist_cdf_double.cu new file mode 100644 index 0000000000..98b6510ad1 --- /dev/null +++ b/test/test_geometric_dist_cdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::geometric_distribution(0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch geometric distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::geometric_distribution(0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_geometric_dist_cdf_float.cu b/test/test_geometric_dist_cdf_float.cu new file mode 100644 index 0000000000..2662ac07c5 --- /dev/null +++ b/test/test_geometric_dist_cdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::geometric_distribution(0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch geometric distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::geometric_distribution(0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_geometric_dist_cdf_nvrtc_double.cpp b/test/test_geometric_dist_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..f8c5ed5aad --- /dev/null +++ b/test/test_geometric_dist_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_geometric_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::geometric_distribution(0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_geometric_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_geometric_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_geometric_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::geometric_distribution(0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_geometric_dist_cdf_nvrtc_float.cpp b/test/test_geometric_dist_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..a53cd0d972 --- /dev/null +++ b/test/test_geometric_dist_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_geometric_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::geometric_distribution(0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_geometric_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_geometric_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_geometric_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::geometric_distribution(0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_geometric_dist_pdf_double.cu b/test/test_geometric_dist_pdf_double.cu new file mode 100644 index 0000000000..03d2dc0078 --- /dev/null +++ b/test/test_geometric_dist_pdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::geometric_distribution(0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch geometric distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::geometric_distribution(0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_geometric_dist_pdf_float.cu b/test/test_geometric_dist_pdf_float.cu new file mode 100644 index 0000000000..1034d122b5 --- /dev/null +++ b/test/test_geometric_dist_pdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::geometric_distribution(0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch geometric distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::geometric_distribution(0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_geometric_dist_pdf_nvrtc_double.cpp b/test/test_geometric_dist_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..8a6b5756e6 --- /dev/null +++ b/test/test_geometric_dist_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_geometric_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::geometric_distribution(0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_geometric_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_geometric_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_geometric_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::geometric_distribution(0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_geometric_dist_pdf_nvrtc_float.cpp b/test/test_geometric_dist_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..dfb05105dd --- /dev/null +++ b/test/test_geometric_dist_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_geometric_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::geometric_distribution(0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_geometric_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_geometric_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_geometric_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::geometric_distribution(0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_geometric_dist_quan_double.cu b/test/test_geometric_dist_quan_double.cu new file mode 100644 index 0000000000..fcac938e5a --- /dev/null +++ b/test/test_geometric_dist_quan_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::geometric_distribution(0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch geometric distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::geometric_distribution(0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_geometric_dist_quan_float.cu b/test/test_geometric_dist_quan_float.cu new file mode 100644 index 0000000000..0749522021 --- /dev/null +++ b/test/test_geometric_dist_quan_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::geometric_distribution(0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch geometric distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::geometric_distribution(0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 1000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_geometric_dist_quan_nvrtc_double.cpp b/test/test_geometric_dist_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..52b2e97ec4 --- /dev/null +++ b/test/test_geometric_dist_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_geometric_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::geometric_distribution(0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_geometric_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_geometric_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_geometric_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::geometric_distribution(0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_geometric_dist_quan_nvrtc_float.cpp b/test/test_geometric_dist_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..a83cf857e8 --- /dev/null +++ b/test/test_geometric_dist_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_geometric_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::geometric_distribution(0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_geometric_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_geometric_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_geometric_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::geometric_distribution(0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_hankel.cpp b/test/test_hankel.cpp index f8bd173da8..a93e90c4d1 100644 --- a/test/test_hankel.cpp +++ b/test/test_hankel.cpp @@ -3,9 +3,13 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif #define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#include + #define BOOST_TEST_MAIN #include #include @@ -85,6 +89,7 @@ void test_hankel(T, const char* name) // // Instantiate a few instances to check our error handling code can cope with std::complex: // +#ifndef SYCL_LANGUAGE_VERSION typedef boost::math::policies::policy< boost::math::policies::overflow_error, boost::math::policies::denorm_error, @@ -120,7 +125,7 @@ typedef boost::math::policies::policy< boost::math::policies::indeterminate_result_error > pol3; template std::complex boost::math::cyl_hankel_1(double, double, const pol3&); - +#endif BOOST_AUTO_TEST_CASE( test_main ) { diff --git a/test/test_hermite.cpp b/test/test_hermite.cpp index d1127feec2..60dafdb8f1 100644 --- a/test/test_hermite.cpp +++ b/test/test_hermite.cpp @@ -5,8 +5,15 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include -#include"test_hermite.hpp" +#endif + +#ifndef BOOST_MATH_OVERFLOW_ERROR_POLICY +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#endif + +#include "test_hermite.hpp" // // DESCRIPTION: diff --git a/test/test_hermite.hpp b/test/test_hermite.hpp index 0b00677eec..8f7c55ff10 100644 --- a/test/test_hermite.hpp +++ b/test/test_hermite.hpp @@ -11,11 +11,17 @@ // Constants are too big for float case, but this doesn't matter for test. #endif +#include + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include +#endif + #define BOOST_TEST_MAIN #include #include #include +#include #include #include #include "functor.hpp" diff --git a/test/test_hermite_double.cu b/test/test_hermite_double.cu new file mode 100644 index 0000000000..a53766171a --- /dev/null +++ b/test/test_hermite_double.cu @@ -0,0 +1,120 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::hermite(1U, in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::hermite(1U, input_vector2[i])); + double t = w.elapsed(); + // check the results + int fail_counter = 0; + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 1000) + { + std::cerr << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i] << '\n' + << " Host: " << results[i] << '\n' + << " Eps: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl; + fail_counter++; + if (fail_counter > 100) + { + break; + } + } + } + } + + if (fail_counter > 0) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_hermite_float.cu b/test/test_hermite_float.cu new file mode 100644 index 0000000000..c48560bbe5 --- /dev/null +++ b/test/test_hermite_float.cu @@ -0,0 +1,120 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::hermite(1U, in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::hermite(1U, input_vector2[i])); + double t = w.elapsed(); + // check the results + int fail_counter = 0; + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 1000) + { + std::cerr << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i] << '\n' + << " Host: " << results[i] << '\n' + << " Eps: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl; + fail_counter++; + if (fail_counter > 100) + { + break; + } + } + } + } + + if (fail_counter > 0) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_hermite_nvrtc_double.cpp b/test/test_hermite_nvrtc_double.cpp new file mode 100644 index 0000000000..569d975cb6 --- /dev/null +++ b/test/test_hermite_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_hermite_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::hermite(1U, in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_hermite_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_hermite_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_hermite_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::hermite(1U, h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_hermite_nvrtc_float.cpp b/test/test_hermite_nvrtc_float.cpp new file mode 100644 index 0000000000..e2e907c519 --- /dev/null +++ b/test/test_hermite_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_hermite_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::hermite(1U, in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_hermite_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_hermite_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_hermite_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::hermite(1U, h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_heuman_lambda.cpp b/test/test_heuman_lambda.cpp index 83709c635b..cdcf39aa68 100644 --- a/test/test_heuman_lambda.cpp +++ b/test/test_heuman_lambda.cpp @@ -4,7 +4,10 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif + #include "test_heuman_lambda.hpp" // diff --git a/test/test_heuman_lambda.hpp b/test/test_heuman_lambda.hpp index 23720b2d02..6081dac482 100644 --- a/test/test_heuman_lambda.hpp +++ b/test/test_heuman_lambda.hpp @@ -8,11 +8,17 @@ // Constants are too big for float case, but this doesn't matter for test. #endif +#include + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include +#endif + #define BOOST_TEST_MAIN #include #include #include +#include #include #include #include "functor.hpp" diff --git a/test/test_heuman_lambda_double.cu b/test/test_heuman_lambda_double.cu new file mode 100644 index 0000000000..361dbe8051 --- /dev/null +++ b/test/test_heuman_lambda_double.cu @@ -0,0 +1,120 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::heuman_lambda(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::heuman_lambda(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + int fail_counter = 0; + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 200) + { + std::cerr << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i] << '\n' + << " Host: " << results[i] << '\n' + << " Eps: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl; + fail_counter++; + if (fail_counter > 100) + { + break; + } + } + } + } + + if (fail_counter > 0) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_heuman_lambda_float.cu b/test/test_heuman_lambda_float.cu new file mode 100644 index 0000000000..361dbe8051 --- /dev/null +++ b/test/test_heuman_lambda_float.cu @@ -0,0 +1,120 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::heuman_lambda(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::heuman_lambda(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + int fail_counter = 0; + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 200) + { + std::cerr << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i] << '\n' + << " Host: " << results[i] << '\n' + << " Eps: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl; + fail_counter++; + if (fail_counter > 100) + { + break; + } + } + } + } + + if (fail_counter > 0) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_heumann_lambda_nvrtc_double.cpp b/test/test_heumann_lambda_nvrtc_double.cpp new file mode 100644 index 0000000000..38c762fd51 --- /dev/null +++ b/test/test_heumann_lambda_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_heuman_lambda_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::heuman_lambda(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_heuman_lambda_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_heuman_lambda_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_heuman_lambda_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::heuman_lambda(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_heumann_lambda_nvrtc_float.cpp b/test/test_heumann_lambda_nvrtc_float.cpp new file mode 100644 index 0000000000..5139b9d6f6 --- /dev/null +++ b/test/test_heumann_lambda_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_heuman_lambda_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::heuman_lambda(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_heuman_lambda_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_heuman_lambda_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_heuman_lambda_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::heuman_lambda(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_holtsmark.cpp b/test/test_holtsmark.cpp new file mode 100644 index 0000000000..93a40924d6 --- /dev/null +++ b/test/test_holtsmark.cpp @@ -0,0 +1,917 @@ +// Copyright Takuma Yoshimura 2024. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_TEST_MAIN +#define BOOST_TEST_MODULE StatsHoltsmarkTest +#include +#include +#include + +#include + +#if __has_include() +# include +#endif + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wliteral-range" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Woverflow" +#endif + +using boost::math::holtsmark_distribution; + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +#include +using boost::multiprecision::cpp_bin_float_quad; +#endif + +template +void do_test_holtsmark_pdf(){ + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + holtsmark_distribution dist(static_cast(0), static_cast(1)); + + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.36729417918039395222067998266923903487897550760740e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.65389736963758327689008908803579458127136270822821e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.02515191704410688567167143509210415364664018836038e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.51083986231955529936787758130352472694082331202869e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.15094236163249353135030241188004077293096105502542e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.99750209903363198419241505065146206315152726747464e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.11488945306717663129360225856869217115733169200098e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.57635410598104651856363821355027691095093972951943e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.45396231261375200568114750897618690566092315194568e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.56644599840900478087175884712634478003230341866094e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.07991505579130717014680432847812811882295188855215e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21504440259916207727077397273468920426729181666284e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.36133628073378183373326886775069575640127303211029e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.51745548085348400860371488668500734429223868343929e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.68134487107062900924723590620591092812119992658420e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.85018067925573560771430043931430243630326746823000e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.02038159607840130388931544845552929991729709746772e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.10468477092312109723487937526691724501188944561469e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.18768434960425041116444711570747254236793293668156e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.26876533945661456653252880545328465490361724757011e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.34729279420630671501163324236373491976559833675371e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.42262031889641529582832839126322520342094549884605e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.49409942058625893002692983261905908575520500298169e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.56108951988457608160176526934999395388199046235972e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.62296840354090035789597147663858548813023471783846e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.67914286936215258107106284322230987052088197806732e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.72905929336501384188469491706325278582273134014420e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.77221383677408177290219591627664575751370295766362e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.80816200831657887972174538033323818453141437738815e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.83652730587590691770044766018047580182775092180058e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.85700868106012048340895194764008089858950037497619e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.86938660017923959009561433879636172948429661600328e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.87352751452164445024482162286994868261727837966217e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.86938660017923959009561433879636172948429661600328e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.85700868106012048340895194764008089858950037497619e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.83652730587590691770044766018047580182775092180058e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.80816200831657887972174538033323818453141437738815e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.77221383677408177290219591627664575751370295766362e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.72905929336501384188469491706325278582273134014420e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.67914286936215258107106284322230987052088197806732e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.62296840354090035789597147663858548813023471783846e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.56108951988457608160176526934999395388199046235972e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.49409942058625893002692983261905908575520500298169e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.42262031889641529582832839126322520342094549884605e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.34729279420630671501163324236373491976559833675371e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.26876533945661456653252880545328465490361724757011e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.18768434960425041116444711570747254236793293668156e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.10468477092312109723487937526691724501188944561469e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.02038159607840130388931544845552929991729709746772e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.85018067925573560771430043931430243630326746823000e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.68134487107062900924723590620591092812119992658420e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.51745548085348400860371488668500734429223868343929e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.36133628073378183373326886775069575640127303211029e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21504440259916207727077397273468920426729181666284e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.07991505579130717014680432847812811882295188855215e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.56644599840900478087175884712634478003230341866094e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.45396231261375200568114750897618690566092315194568e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.57635410598104651856363821355027691095093972951943e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.11488945306717663129360225856869217115733169200098e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.99750209903363198419241505065146206315152726747464e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.15094236163249353135030241188004077293096105502542e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.51083986231955529936787758130352472694082331202869e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.02515191704410688567167143509210415364664018836038e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.65389736963758327689008908803579458127136270822821e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.36729417918039395222067998266923903487897550760740e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.66754576694881156512310862711445437434536539665220e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.11173604765480684115169149814306652067264621028219e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.40661030133408839114013026681116038222350468593972e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.22346322254737897976061662951210609505224644527458e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.37441612177611972649583292419049409747033180591176e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.74744460065068339118467750880557414156817094839507e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(7.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.27304026309950351561235691054710976330201642992015e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(8)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.90649774685568282389553481307707005424869182652166e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(9)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38767874243521145729361398147898730301527980427894e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(10)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.04777602492944046116536405570429208835346612226751e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(11)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.14427312838534546510639939284636768866987456541955e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(12)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.48125490715334110982111302156190976484065179596964e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(13)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.25926848543648125740563360613612653857829343973075e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(14)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.33819608589296696976594994753563070519825392999328e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(15)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.62882342819480981578669644070664737082208227167328e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(16)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.07231582988207590928480356376941073734041404814646e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(18)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.27006701837892764913571864450685488819589225207004e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(20)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.73366906892470965030093227280098921046549874789447e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(22)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.35957320021440204694920969306620767773792096574145e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(24)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.08962051920666002556891360954800856160496745325982e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(26)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.89257005093845424113694708405490444536741708307432e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(28)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.36985573727704971748849427466806366268717813920884e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(30)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.18908057108330090847508167263164576897080536147998e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(32)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.25741312407933720816582583160953651639222885045617e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(36)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.90521803068812235229266578780465914597024834788499e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(40)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.99440098605222381975051163529379005706849379136631e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(44)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.35556983515831713649398537010595127693244595946631e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(48)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.89252591391344813399503397931059007207167187335384e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(52)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.54761819718387004182264813450582914982582239574760e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(56)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28473144505453904388319480756963924103363111590946e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(60)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.08038477619830474574170723303117982155643976342020e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(64)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.18821097500148811906668422616947735567544539343910e-6), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(72)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.83770756909753597744679176403606964363170824955403e-6), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(80)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.25030682765210472090258978149127752580598842866547e-6), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(88)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.13470581474698689445958562292807701026370826971490e-6), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(96)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.32481553840568432886657947564022812389544240910245e-6), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(104)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.72079249234488105623103527050493983216840056389979e-6), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(112)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.25993583492605042781181768463472827114252734385354e-6), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(120)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.90139785097711606987697985776461422964227374409867e-6), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.61772178749879746562419160426660201168856327635754e-6), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(256)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.85568203412051586615998003517909621954036193765824e-7), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(512)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.04563780918059698346140819518179336362098891429611e-8), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1024)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.91792339891869109849236922708731694322153177924824e-9), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 11)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.57638176923363231335450711092364627257129748508825e-9), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 12)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.78661356398095539721893167504764083609118624639856e-10), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 13)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.92604459926540711296353370623384343787650705186643e-11), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 14)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.70807462044367666228538283653300633948804226008283e-12), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 15)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.53938313936621297172285881159282265471330118114236e-12), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 16)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.72126969520581558576219170420297785383800313247716e-13), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 17)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.81057004571925961169117504945825287978291651680376e-14), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 18)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.50396638201843310535816082381660912483279969324341e-15), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 19)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.50330305081854272655464104542050109297998338121296e-15), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 20)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.65748943908780839905864199570025333227786061957216e-16), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 21)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.69782199924985143903444545973473854205301886566784e-17), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 22)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.30465447555059221873312238790689561215989143051468e-18), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 23)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.46806937341565301230797169380667244412810953871153e-18), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 24)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.59520452276585659227559942516968833301265805493281e-19), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.58771679139660099100514647692148711295768774585921e-20), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 26)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.11001413331373018059574227843565648889473010676660e-21), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 27)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.43366149729083958029796238740037446793377590904146e-21), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 28)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.53437941664766678806680232945039585344994407777604e-22), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 29)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.48019217902582750507529988918628139079404963189635e-23), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 30)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.91993567701892984811136675864172386794777828712695e-24), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 31)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.40006005594525679129869441468227938628239752074460e-24), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 32)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.47497989906821913751146663525981445654490373951224e-25), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 34)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.73431218458810807033921781897016124929992555367139e-27), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 36)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.41697255768378077454412606000376285104775580444420e-28), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 38)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.55303924276181374957661287953452523234530316565188e-30), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 40)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.36032476336306675100543445227730858361169873925804e-31), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 42)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.37601488550958357902586661939110889053699438042016e-33), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 44)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.30500465172174486774768816059192932509423715417166e-34), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 46)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.20313953663045271143891020576861031160487434865093e-36), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 48)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.25098110519701647231401040429952006498171313622520e-37), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 50)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.03431595374067647597712273413788666502302011992057e-39), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 52)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.19822373554396139874268836303925702273981755123190e-40), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 54)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.86944917357487937107083766130282172728072819621788e-42), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 56)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.14670286674214980345963428973545770896346187182759e-43), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 58)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.70844645856921813581135618689921390269372237700480e-45), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 60)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.09638951830288066744104877057303202280205793138993e-46), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 62)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.55121724469650208575327739326222025805731763269349e-48), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 64)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.04725538896765690179789918481715848637732139291593e-49), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 68)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.99927284078872744316201092259372825667966088624440e-52), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 72)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.95241488358274164371290129159422806319923560612665e-55), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 76)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.90665515974877113643838016757246988460215123083510e-58), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 80)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.86196792944215931292810563239498984002947733280447e-61), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 84)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.81832805609585870403135315663573226135611325562940e-64), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 88)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.77571099228111201565561831702708228641500206234884e-67), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 92)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.73409276589952345278868976272176004532614982149466e-70), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 96)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.69344996669875337186395484640796879426380291665101e-73), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 100)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.65375973310425133971089340469528202564824480281467e-76), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 104)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.61499973935962044893641934052273635317211406169375e-79), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 108)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.57714818296837934466447201222923471989464263831856e-82), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 112)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.54018377243005795377389844944261203114711195148214e-85), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 116)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.50408571526372847048232270453380081166710151511926e-88), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 120)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.46883370631223483445539326614628985514365382335866e-91), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 124)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.43440791632054183052284498647098618666372443687369e-94), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.40078898078177913136996580710057244791379339538446e-97), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.40078898078177913136996580710057244791379339538446e-97), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 136)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.33589647367647088181492405614907498160724010027357e-103), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 144)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.27401015632292831593983083357722757492755899455411e-109), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 152)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21499076492588836282713969571802861683612727599536e-115), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 160)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.15870548718060337336267442294886457141506888961350e-121), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 168)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.10502766340313279472606127066503960744387520753240e-127), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 176)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.05383650150597838852506758753303490394961853745690e-133), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 184)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.00501680517766798832422980073264589686357358689966e-139), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 192)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.58458714654605854343633461697240731109212481403024e-146), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 200)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.14057459501844267219193898865929347142422181513809e-152), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 208)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.71713122846454875201410197130135867254659825815019e-158), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 216)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.31330416532950282288942525034080378775272203269023e-164), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 224)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.92818466694784433640425229105072382712623790043853e-170), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 232)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.56090609259399827614236096482345946037887373012402e-176), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 240)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.21064194926643207182155701143594690359008191120531e-182), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 248)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.87660403181689459974437428611368837698944274063617e-188), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 256)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.55804064923943958258092335330361211489624284804933e-194), tolerance); +} + +template +void do_test_holtsmark_cdf() { + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + holtsmark_distribution dist(static_cast(0), static_cast(1)); + + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.05754562114095147060025732340404110273918791128410e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.43368449353064312968788810247763526151579823896375e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.89152652981222928500336992235218785823347685619486e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.45576815036790322986175114501424442292398775002753e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.15978035591850473786135203626730317231876547935253e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.04840867577215219812315602202859291564396444891271e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.18107352643844092577238041212187564083206726822056e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.63452659158632535349648745626466375370661702427253e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.05039829654829170780787685299557006719608839147146e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.16290125976286647650587923507379034571826555851855e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.29006094814340314434551699445775276932192677014796e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.43337488600334723245402521343673257218591019586678e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.59428804524998597404889957316726146572475641692056e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.77411943695232738645173996334900914289694617802622e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.97397674713122675501528813572074060920189377220435e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.19466281873758062089629668872911218009781881016957e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.43657975600729535499895880792984203156689462603233e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.56549331000915958092357114949075709547374035120716e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.69963821328456787766164921360924665804748003656962e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.83891389704828738280543932012082190429368746897774e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.98318069008956501931929973721256792184639224594009e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.13225883262651243874304479279543149837428487068880e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.28592804901375835801811223649103787689862120396348e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.44392772657854742558827130471487286937515502098876e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.60595773518728397925852903878144782668636743128847e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.77167990746831553875195022302050315427406387091454e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.94072018335986771952143399033763464047462959349960e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.11267140533666880513239944176560287064005137544504e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.28709673289702231328042170648699212093305785288837e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.46353362731996489993314637378816744341919505345430e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.64149834100915287958166424201293669944118353975980e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.82049083059559169016945833205795772857456974521089e-1), tolerance); + BOOST_CHECK_EQUAL(cdf(dist, static_cast(0)), static_cast(0.5)); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.17950916940440830983054166794204227142543025478911e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.35850165899084712041833575798706330055881646024020e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.53646637268003510006685362621183255658080494654570e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.71290326710297768671957829351300787906694214711163e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.88732859466333119486760055823439712935994862455496e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.05927981664013228047856600966236535952537040650040e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.22832009253168446124804977697949684572593612908546e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.39404226481271602074147096121855217331363256871153e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.55607227342145257441172869528512713062484497901124e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.71407195098624164198188776350896212310137879603652e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.86774116737348756125695520720456850162571512931120e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.01681930991043498068070026278743207815360775405991e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.16108610295171261719456067987917809570631253102226e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.30036178671543212233835078639075334195251996343038e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.43450668999084041907642885050924290452625964879284e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.56342024399270464500104119207015796843310537396767e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.80533718126241937910370331127088781990218118983044e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.02602325286877324498471186427925939079810622779565e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.22588056304767261354826003665099085710305382197378e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.40571195475001402595110042683273853427524358307944e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.56662511399665276754597478656326742781408980413322e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.70993905185659685565448300554224723067807322985204e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.83709874023713352349412076492620965428173444148145e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.94960170345170829219212314700442993280391160852854e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.13654734084136746465035125437353362462933829757275e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.28189264735615590742276195878781243591679327317794e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.39515913242278478018768439779714070843560355510873e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.48402196440814952621386479637326968276812345206475e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.55442318496320967701382488549857555770760122499725e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.61084734701877707149966300776478121417665231438051e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.65663155064693568703121118975223647384842017610363e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.69424543788590485293997426765959588972608120887159e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.75181239051510561995724918481437181225561682663730e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.79330912859883809209439632732487322514831404016950e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.82433333964997258238374422700352745232108521574338e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.84823776850456273960749947374965581355459282720918e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.86712078422837181984231917376692653955733800370589e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.88234978933381804514525314568892574021644736001062e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(7.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.89484817977790488434819448808352507951600536883956e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(8)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.90525915297517643347206332876433307912297417265919e-1), tolerance); +} + +template +void do_test_holtsmark_ccdf() { + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + holtsmark_distribution dist(static_cast(0), static_cast(1)); + + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-2))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.94960170345170829219212314700442993280391160852854e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.83709874023713352349412076492620965428173444148145e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.70993905185659685565448300554224723067807322985204e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.56662511399665276754597478656326742781408980413322e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.40571195475001402595110042683273853427524358307944e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.22588056304767261354826003665099085710305382197378e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.02602325286877324498471186427925939079810622779565e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.80533718126241937910370331127088781990218118983044e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.56342024399270464500104119207015796843310537396767e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.9375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.43450668999084041907642885050924290452625964879284e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.30036178671543212233835078639075334195251996343038e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.8125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.16108610295171261719456067987917809570631253102226e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.01681930991043498068070026278743207815360775405991e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.6875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.86774116737348756125695520720456850162571512931120e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.71407195098624164198188776350896212310137879603652e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.5625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.55607227342145257441172869528512713062484497901124e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.39404226481271602074147096121855217331363256871153e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.4375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.22832009253168446124804977697949684572593612908546e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.05927981664013228047856600966236535952537040650040e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.3125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.88732859466333119486760055823439712935994862455496e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.71290326710297768671957829351300787906694214711163e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.1875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.53646637268003510006685362621183255658080494654570e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.35850165899084712041833575798706330055881646024020e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.0625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.17950916940440830983054166794204227142543025478911e-1), tolerance); + BOOST_CHECK_EQUAL(cdf(complement(dist, static_cast(0))), static_cast(0.5)); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.0625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.82049083059559169016945833205795772857456974521089e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.64149834100915287958166424201293669944118353975980e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.1875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.46353362731996489993314637378816744341919505345430e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.28709673289702231328042170648699212093305785288837e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.3125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.11267140533666880513239944176560287064005137544504e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.94072018335986771952143399033763464047462959349960e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.4375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.77167990746831553875195022302050315427406387091454e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.60595773518728397925852903878144782668636743128847e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.5625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.44392772657854742558827130471487286937515502098876e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.28592804901375835801811223649103787689862120396348e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.6875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.13225883262651243874304479279543149837428487068880e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.98318069008956501931929973721256792184639224594009e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.8125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.83891389704828738280543932012082190429368746897774e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.69963821328456787766164921360924665804748003656962e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.9375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.56549331000915958092357114949075709547374035120716e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.43657975600729535499895880792984203156689462603233e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.19466281873758062089629668872911218009781881016957e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.97397674713122675501528813572074060920189377220435e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.77411943695232738645173996334900914289694617802622e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.59428804524998597404889957316726146572475641692056e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.43337488600334723245402521343673257218591019586678e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.29006094814340314434551699445775276932192677014796e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.16290125976286647650587923507379034571826555851855e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(2))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.05039829654829170780787685299557006719608839147146e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(2.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.63452659158632535349648745626466375370661702427253e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(2.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.18107352643844092577238041212187564083206726822056e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(2.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.04840867577215219812315602202859291564396444891271e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(3))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.15978035591850473786135203626730317231876547935253e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(3.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.45576815036790322986175114501424442292398775002753e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(3.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.89152652981222928500336992235218785823347685619486e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(3.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.43368449353064312968788810247763526151579823896375e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(4))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.05754562114095147060025732340404110273918791128410e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(4.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.48187609484894380042750815185628187744383173362701e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.06690871401161907905603672675126774851685959830498e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(5.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.75666660350027417616255772996472547678914784256620e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(6))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.51762231495437260392500526250344186445407172790817e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(6.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.32879215771628180157680826233073460442661996294112e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(7))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.17650210666181954854746854311074259783552639989383e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(7.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.05151820222095115651805511916474920483994631160437e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(8))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.47408470248235665279366712356669208770258273408139e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(9))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.84633233633455623701916449563937321750458546930453e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(10))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.63980919776847052871529866313101745651230650390958e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(11))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.71560271091813267790502831701953317735310509411390e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.98877952779840995427042841099634486251917561421235e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(13))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.40474538743684446341559852404247422921805516802754e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.92694975034713587589009244180464471384939103569360e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(15))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.53008216069181937774819568728358865566934398876406e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.19610991747326725339429696634365931201323237447865e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(18))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.66754697245984312281273324441306444625571550964874e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.27055303995134836245432119316955904009319977641407e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(22))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.96335385591609284864855802297989450933958012331026e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(24))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.71983068404058084817697571696065106175519154567134e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(26))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.52289416073896095009461377823763903270627907120760e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(28))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.36093802515022944719982175339249822123873546752029e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(30))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.22583027731897389283291308558313649709444449306352e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(32))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.11172037056341396583040940446061500559620114648017e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(36))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.30343276748924876383449972567328892540120922780153e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(40))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.93479451609843506274951848311637162210971363421001e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(44))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.87195324030611990483144403072151789810819131065125e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(48))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.02706331251660616582062908165140543120249377843861e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(52))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.34227281260431186934023112980425214646324423530835e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(56))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.77808682390572890160969127059116197868923042197157e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(60))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.30671561098656496307105837586195479010992455366704e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(64))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.90809577465797739887717333706777298095814323854594e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(72))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.27353325758395877256315285908308787899438871193344e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(80))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.79392283157821028307612118603031347679870090601645e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(88))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.42100764912263851750335808792156725434230560510519e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(96))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.12427366298380215713674931256380175628765437052256e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(104))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.88357904993849478063900853297202135211787374336745e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(112))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.68514784935653217641685603828708555083243580835252e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(120))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.51927236784831507287987712585213807190192778503378e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.37893523202524132194116984313653211252949573780743e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.87179879353850251536193247663317269917137569329013e-5), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(512))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.72200715673082274403568012456413425331659480927047e-5), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1024))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.08767256634253255597053159005214917567906016620942e-6), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 11))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.15224951834965340756402503731619739933629215689572e-6), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.60926645137475638433041259135060814007792759468100e-7), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 13))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.69027136710097153757239920401079588116557633592788e-7), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.51153240121827639330112195094153363441740650435701e-8), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 15))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.36283287602527689849064009106938913094716520594050e-8), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.18894075854698171408179022464997418218329601220535e-8), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 17))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.20354010552543796231728241342645692784842686349965e-9), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 18))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.48617582449547816069323420820736862370145659607645e-9), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 19))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.25442497729649541267939242007563203416972765165476e-10), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.85771976129306035185873711236944017216967773980517e-10), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 21))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.56803119746276025902637885330726865801883592203156e-11), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 22))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.32214969859659307190062902673645385449168267230206e-11), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 23))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.21003899305378486738410997502574389330326609057126e-12), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 24))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.90268712277390815798081242324390081166198190434103e-12), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.02625487407274396069298698869802596448489279633645e-12), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 26))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.62835890339366126281650716118851052269284048205938e-13), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 27))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28281859258171445903392888079435183340041265714998e-13), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 28))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.53544862923055721373031196622890601888700626111182e-14), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 29))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.60352324072570315319362346398633880353985161880622e-14), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 30))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.66931078653639661641440656550426788294070266782671e-15), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 31))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.00440405090690395389846895694672344247801107283103e-15), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 32))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.08663848317021453602605776312368607691806298813143e-16), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 34))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.85829810396272422714320494875293186097734524880995e-17), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 36))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.10728726299533984178525425523566315206520781873938e-17), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 38))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38410907874417469494912307476988489629927133322828e-18), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 40))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.73013634843021835192352185216943643167993641440545e-19), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 42))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.16267043553777293728520200407227686276581114275508e-20), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 44))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.70333804442221617119725245647479628568092558840977e-21), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 46))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.37917255552777021393262025049731570198920941773050e-22), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 48))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.22396569440971276740578385685661655637545268535833e-23), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 50))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.27995711801214095925566865602936005935821644314758e-24), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 52))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.59994639751517619906934188799897966230541134026964e-25), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 54))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.24993299689397024883663924561783076352358304956329e-26), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 56))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.03124162461174628110457931016502737959110130110786e-26), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 58))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28905203076468285138072404465359649544991622324619e-27), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 60))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.61131503845585356422590504127751316165005771606733e-28), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 62))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.01414379806981695528238129932509731805283190086691e-29), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 64))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.51767974758727119410297662380140381412701796292469e-30), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 68))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.93387460560511124078590097461169759773383672872924e-32), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 72))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.14667907125798631372797027282887330061342582520339e-34), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 76))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.60418604884060361519995355129506804305208883665969e-36), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 80))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.50065407013134431487499274238985426822797191535889e-37), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 84))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.34477198458022549199217615998414729133523646527342e-39), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 88))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.66370622590660233123777524997523014264365635070860e-41), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 92))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.72454097797906614255902382808629709787906142136400e-43), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 96))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.94459527809229084774847473138483921543599314796577e-45), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 100))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.39759301220192044496069917677888112741187383092503e-46), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 104))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.18373908156550069525109246371700176158105285841693e-48), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 108))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.41209231494609483632983197455781525247039509121778e-50), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 112))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.33139424210327318176536246024658633198499233002634e-52), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 116))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.33030350328636434650837884413529114372655051566613e-54), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 120))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.30160992238849442914193419439613924120727351807283e-55), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 124))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.03376550373202254553427217874396756438636487198880e-57), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.17775859958128522739730027928744931935369511248250e-59), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.17775859958128522739730027928744931935369511248250e-59), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 136))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.75819970600899713720044013497912431482835720820923e-63), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 144))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.89409172509985281669932620482888777217489189653546e-66), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 152))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.62424737573206254076983936725802678753635716927602e-70), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 160))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.12896664446583558124263656427197919617586844953028e-73), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 168))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.75626622184041889951815567449213670941374133186103e-77), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 176))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.72916558066508520390174725217806813821714192348884e-81), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 184))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.64286269059206181735882501273878616655691941491427e-84), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 192))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.01089524070327592128619387875680216444560404031804e-88), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 200))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.79222470874823222970262177430859903429102548905771e-92), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 208))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.39067986053423638420474164411831031110620739478948e-95), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 216))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.83662075325741304737485752958571853297413914743525e-99), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 224))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.42495623858823560726925232655901331371438944029181e-102), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 232))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.47889706686580958805969806288821609793552109446243e-106), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 240))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.49340104215285543959887222384818383285039329702741e-110), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 248))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.07358423880684947255831841402543550606699055103208e-113), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.06246152052453484511308206549178590348386364998067e-117), tolerance); + + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 600))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.35984768491636685338723260976696756010081951066709e-272), tolerance); + + // The test is terminated because x is inf after this in fp64. + if (N <= 53) { + return; + } + + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 10000))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.07856314267954244808378917796336533616174400834444e-4517), tolerance); +} + +template +void do_test_holtsmark_quantile_nearzero() { + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 4; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + holtsmark_distribution dist(static_cast(0), static_cast(1)); + + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.03125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.95154561868642736172727541497862573947423988108440e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.70076903638473389271107254016757513871156973570753e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.09375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.14334937403136202998120100318789158621074793095204e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.78777320599109327486003359289950832456660816353247e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.15625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.52359744792606448551215814625237535910408488746319e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.31030403921397562363480619469818801698833367088973e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.21875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.12887694529692805365864437467349833307816315605127e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, -9.68933181713583005208786313049614919507807226788256e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.28125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.24180933056718074475811972779037648260058221511975e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.90498457639035315478919560434827057700411821656397e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.34375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.65011034985136468564856594364133703131867596374603e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.45602574935161564367593993713685007358536186330925e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.40625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.30636223130194458226267853223267146033119688836326e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.18783126811804488668597304309230757418655445388316e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.46875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.08910089454817618171205461377219663239656206354005e-1), tolerance); + BOOST_CHECK_EQUAL(quantile(dist, static_cast(0.5)), static_cast(0)); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.50390625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.35942281074760935023421465350943213907657016740203e-2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.5078125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.71903117534558589826852519194663375075753566669865e-2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.51171875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.07901080568320221484687881885423066805647774437405e-2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.515625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.43954773006081419776089446583406196027634755818438e-2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.51953125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.80082845222988104915780076816665811376325257794804e-2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.5234375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.16304011143799846843111212111260495740955724460844e-2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.52734375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.52637064382003902226333974540096053392566128735022e-2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.53125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.08910089454817618171205461377219663239656206354005e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.53515625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.22571450376289000001278843371459783620761964026442e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.5390625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.36249702341028008589045826702986410397012912904709e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.54296875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.49946773116932301309732751744953722231029126346486e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.546875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.63664606836100199300226165836560082654887238338208e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.55078125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.77405165765079089783419383641965337744707947259452e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.5546875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.91170432114732759551367174850419509782585472590891e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.55859375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.04962409893975946705863512977926741730110899605668e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.18783126811804488668597304309230757418655445388316e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.56640625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.32634636232249536218129854873203382200612639970098e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.5703125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.46519019187105731139818680492599864661432850376167e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.57421875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.60438386451527503677057942069112096470660708202704e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.578125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.74394880687856404334281468719091417527159191461011e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.58203125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.88390678663337502806483517706706303095739191330472e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.5859375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.02427993547706435128366410334899635275725704584021e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.58984375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.16509077296982960387421259319514641259915543447926e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.59375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.30636223130194458226267853223267146033119688836326e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.59765625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.44811768106176498805637646337817923388445968601566e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.6015625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.59038095808060603000739360761949422570819209911788e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.60546875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.73317639143565086527888233910690716885943557654200e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.609375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.87652883269757340156275635385373789172745768932299e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.61328125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.02046368651559365036792703758158149784555601527522e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.6171875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.16500694263927667044145946879116606537710528519644e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.62109375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.31018520948359067342384024222999190532594772267877e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.45602574935161564367593993713685007358536186330925e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.62890625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.60255651543790720669175415204007861250418903243190e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.6328125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.74980619074494545171846930824082767773201407564280e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.63671875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.89780422905541754178666731200120149076546958331875e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.640625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.04658089811438846102378009478135229217539543328542e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.64453125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.19616732518780928821098759267869073306168616762584e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.6484375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.34659554517741239172052324381351337830662490680744e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.65234375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.49789855148697728320168560667825948269915149112198e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.65625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.65011034985136468564856594364133703131867596374603e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.66015625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.80326601535777269419392897717286438879602729753825e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.6640625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.95740175290855108336930680485963056005472680796594e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.66796875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.11255496139682430686772264889247897534154814090188e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.671875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.26876430189035334589546274389116075758343949758676e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.67578125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.42606977014577414303134971466049624688623181490963e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.6796875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.58451277380488310882288163359470075488686595989134e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.68359375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.74413621465733462219129425896964561569492923310844e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.90498457639035315478919560434827057700411821656397e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.69140625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.06710401828627699619029762829527775785025847826609e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.6953125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.23054247537343446268743874344186034038860806575417e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.69921875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.39534976558556849766533672421653104101284193327489e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.703125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.56157770454041251979362066603791706893576777672076e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.70703125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.72928022860981170501224651695596294779402544635284e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.7109375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.89851352702281826568041708053406482267060399077072e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.71484375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.06933618382042868991732981731903327683778049211437e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.71875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.24180933056718074475811972779037648260058221511975e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.72265625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.41599681082196095188647911444502703373377770261645e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.7265625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.59196535747955714353182241432999465132648793939716e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.73046875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.76978478421742214185140384463517803619244579498785e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.734375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.94952819242075830879661206434669971092120406861116e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.73828125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.13127219511566873406119301005062170096048576202346e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.7421875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.31509715961740080067603245114697688184732500932588e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.74609375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.50108747080172235764142816696014524944397797022118e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.68933181713583005208786313049614919507807226788256e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.75390625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.87992350186512083533727177758662092012850600324604e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.7578125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.00729607820486228792863859054788768644744365249151e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.76171875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.02685472384747190194216544893563951741322238866541e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.765625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.04667921798768908218602656800798525125137666299761e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.76953125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.06678110853147091220341113291781102477507787382916e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.7734375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.08717260890978562986849760571609969494354300822263e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.77734375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.10786665132220733188524032283620543957825067010411e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.78125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.12887694529692805365864437467349833307816315605127e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.78515625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.15021804221161134460394215947401773984635551400974e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.7890625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.17190540651154074066537741066806130017312231661434e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.79296875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.19395549446874295273954813640902172894636549828066e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.796875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21638584145104788446634880619086527181141939030279e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.80078125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.23921515881685008719278461299232510042094064507963e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.8046875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.26246344172387198181843263481470665031951188807903e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.80859375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28615208934364044613153187689263023500565577152964e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.31030403921397562363480619469818801698833367088973e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.81640625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.33494391774730388327762461486718746182135887404098e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.8203125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.36009820925261741664314298719135880075160309673291e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.82421875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38579544623528308147561770378298998291069435447743e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.828125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41206642422644329402039008236727065491131379281346e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.83203125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.43894444498096320102061428365633543440558564852878e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.8359375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.46646559259303986239248263261753112540493595538180e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.83984375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.49466904794112146248736006028509653369456653722975e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.84375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.52359744792606448551215814625237535910408488746319e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.84765625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55329729725622722446696215102952397443200898053734e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.8515625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.58381944212178952767489242020964875845531372433107e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.85546875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.61521961706725528436767664289542667668084392814972e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.859375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.64755907881888895242774154629775987069182281943649e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.86328125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.68090534388763421593277486421165006597881155937148e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.8671875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.71533305062559095290467926282068904086311880655794e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.87109375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.75092497130127405520149304919663885139873951807268e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.78777320599109327486003359289950832456660816353247e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.87890625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.82598059808698758538443295349421685195378444570311e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.8828125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.86566242157090907181680189290396765394233286379846e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.88671875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.90694840369852884963353039612138690706293500611801e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.890625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.94998516446439884497123835171451955479771823102329e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.89453125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.99493917772297054805704471816059443529057908424094e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.8984375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.04200039028200656353168185848438653746136511354273e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.90234375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.09138667776446768039690693058064390661207192288310e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.90625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.14334937403136202998120100318789158621074793095204e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.91015625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.19818019103302928413100774795456740418726081571239e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.9140625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.25621995785515543565780905111478616154698242356459e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.91796875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.31786976613802108820384089261971867032207069978249e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.921875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.38360533647314959687029290315016585988822268727435e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.92578125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.45399575188877505107943151191453137828469465461499e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.9296875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.52972819582716132188558325053476431645151134471514e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.93359375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.61164107343000684767083444817632028828927968917161e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.70076903638473389271107254016757513871156973570753e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.94140625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.79840522818853957979403024307775647164832128717134e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.9453125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.90618896524175502682103322803168945370341377670787e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.94921875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.02623187745561328365781914812219941666139382669240e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.953125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.16130376436882129187002225210296787778679380216571e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.95703125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.31511403805596290718635028471440503099806209998330e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.9609375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.49275168669289561003117954123304779848451307206373e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.96484375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.70139933971958385705119882168933347231988022896107e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.96875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.95154561868642736172727541497862573947423988108440e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.97265625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.25916037511354144395537844168672288266598351274466e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.9765625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.64987772024289138285743520182808805983174934278635e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.98046875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.16779337095862086311250586064141028622224308218889e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.984375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.89632696959562976212621758577393939287586784169403e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.98828125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.01689760146742972661016754806402368873114241071209e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.9921875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.02446937207505244639190533898754565373763855739835e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.99609375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.40479269149807902190125365288201718857308359786880e1), tolerance); +} + +template +void do_test_holtsmark_quantile_lower() { + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + holtsmark_distribution dist(static_cast(0), static_cast(1)); + + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -3)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.78777320599109327486003359289950832456660816353247e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -4)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.70076903638473389271107254016757513871156973570753e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -5)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.95154561868642736172727541497862573947423988108440e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -6)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.89632696959562976212621758577393939287586784169403e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -7)), BOOST_MATH_BIG_CONSTANT(RealType, N, -9.02446937207505244639190533898754565373763855739835e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -8)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.40479269149807902190125365288201718857308359786880e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -10)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.48634712845212854436829850825868101882854019691874e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -12)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.75100888665534247471443047755794084714498529274899e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -14)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.20296704598194178128278789813687898595549252840676e2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -16)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.54977454632239578277064855837334284482347724412824e2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -20)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.52361836255645260708087122768062244544827526253646e3), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -24)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.23734752970824778029419146983721335265273533376700e4), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -28)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.42062670553125367339285994800008811139726501576815e5), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -32)), BOOST_MATH_BIG_CONSTANT(RealType, N, -9.02041713923935805934169232925253585383521252378005e5), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -40)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.63680429392084153704899843149763443252941653727485e7), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -48)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.46626761095577290791010108814328116732395911944180e9), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -56)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.91162056903107188819131273099660458893079363542047e10), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -64)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.38341606205240162091324729036756554251581053237655e12), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -80)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.87423965612020298589373786039770640757408383939246e15), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -96)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.29757143623897661671305188560924841842505858740001e18), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -112)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.02366940392761715971201478542352832996294475084257e22), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -128)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.66397326199025580692950800332626492430331068544172e25), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -160)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.39662661178492965217151228166250717006976010266469e31), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -192)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.16169688570203891305395889425358741905195655516409e38), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -224)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.06948889094299024072567114732250581789838857797378e44), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -256)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.11034458952573565292573972403491751744218292992004e50), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -256)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.11034458952573565292573972403491751744218292992004e50), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -320)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.66220760083199094068657639449976729910923748932480e63), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -384)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.95304965911372717286692649124348058110571556299024e76), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -448)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.75980725346824443030445016705237404631255977139835e89), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -512)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.92674940440883821350824836233864795634501852860731e102), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -640)), BOOST_MATH_BIG_CONSTANT(RealType, N, -9.39113744455748572074441450789762236967081046313794e127), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -768)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.57731878887639063343639480262468437174058078877170e153), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -896)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.23102338973254024994084948896750440849135610207358e179), tolerance); +} + +template +void do_test_holtsmark_quantile_upper() { + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + holtsmark_distribution dist(static_cast(0), static_cast(1)); + + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -3))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.78777320599109327486003359289950832456660816353247e0), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -4))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.70076903638473389271107254016757513871156973570753e0), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.95154561868642736172727541497862573947423988108440e0), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -6))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.89632696959562976212621758577393939287586784169403e0), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -7))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.02446937207505244639190533898754565373763855739835e0), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -8))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.40479269149807902190125365288201718857308359786880e1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -10))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.48634712845212854436829850825868101882854019691874e1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.75100888665534247471443047755794084714498529274899e1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.20296704598194178128278789813687898595549252840676e2), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.54977454632239578277064855837334284482347724412824e2), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.52361836255645260708087122768062244544827526253646e3), tolerance); +} + +template +void do_test_holtsmark_locscale_param() { + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + holtsmark_distribution dist_0_1(static_cast(0), static_cast(1)); + holtsmark_distribution dist_1_3(static_cast(1), static_cast(3)); + + BOOST_CHECK_CLOSE(entropy(dist_0_1), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.0694485051346244003155800384542166381), tolerance); + BOOST_CHECK_CLOSE(entropy(dist_1_3), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.0694485051346244003155800384542166381) + log(static_cast(3)), tolerance); + + BOOST_CHECK_EQUAL(median(dist_0_1), static_cast(0)); + BOOST_CHECK_EQUAL(median(dist_1_3), static_cast(1)); + + BOOST_CHECK_EQUAL(mode(dist_0_1), static_cast(0)); + BOOST_CHECK_EQUAL(mode(dist_1_3), static_cast(1)); + + BOOST_CHECK_EQUAL(mean(dist_0_1), static_cast(0)); + BOOST_CHECK_EQUAL(mean(dist_1_3), static_cast(1)); + + BOOST_CHECK((boost::math::isinf)(variance(dist_0_1))); + + BOOST_CHECK_CLOSE(pdf(dist_0_1, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.02038159607840130388931544845552929991729709746772e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist_1_3, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.87352751452164445024482162286994868261727837966217e-1) / 3, tolerance); + + BOOST_CHECK_CLOSE(cdf(dist_0_1, static_cast(2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.94960170345170829219212314700442993280391160852854e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist_1_3, static_cast(7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.94960170345170829219212314700442993280391160852854e-1), tolerance); + + BOOST_CHECK_CLOSE(cdf(dist_0_1, quantile(dist_0_1, static_cast(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.25), tolerance); + BOOST_CHECK_CLOSE(cdf(dist_1_3, quantile(dist_1_3, static_cast(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.25), tolerance); + + BOOST_CHECK_CLOSE(cdf(dist_0_1, quantile(dist_0_1, static_cast(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.75), tolerance); + BOOST_CHECK_CLOSE(cdf(dist_1_3, quantile(dist_1_3, static_cast(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.75), tolerance); +} + +BOOST_AUTO_TEST_CASE(holtsmark_pdf_fp64) +{ + do_test_holtsmark_pdf(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(holtsmark_pdf_std64) +{ + do_test_holtsmark_pdf(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(holtsmark_pdf_fp128) +{ + do_test_holtsmark_pdf(); +} +#endif + +BOOST_AUTO_TEST_CASE(holtsmark_cdf_fp64) +{ + do_test_holtsmark_cdf(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(holtsmark_cdf_std64) +{ + do_test_holtsmark_cdf(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(holtsmark_cdf_fp128) +{ + do_test_holtsmark_cdf(); +} +#endif + +BOOST_AUTO_TEST_CASE(holtsmark_ccdf_fp64) +{ + do_test_holtsmark_ccdf(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(holtsmark_ccdf_std64) +{ + do_test_holtsmark_ccdf(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(holtsmark_ccdf_fp128) +{ + do_test_holtsmark_ccdf(); +} +#endif + +BOOST_AUTO_TEST_CASE(holtsmark_quantile_nearzero_fp64) +{ + do_test_holtsmark_quantile_nearzero(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(holtsmark_quantile_nearzero_std64) +{ + do_test_holtsmark_quantile_nearzero(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(holtsmark_quantile_nearzero_fp128) +{ + do_test_holtsmark_quantile_nearzero(); +} +#endif + +BOOST_AUTO_TEST_CASE(holtsmark_quantile_lower_fp64) +{ + do_test_holtsmark_quantile_lower(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(holtsmark_quantile_lower_std64) +{ + do_test_holtsmark_quantile_lower(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(holtsmark_quantile_lower_fp128) +{ + do_test_holtsmark_quantile_lower(); +} +#endif + +BOOST_AUTO_TEST_CASE(holtsmark_quantile_upper_fp64) +{ + do_test_holtsmark_quantile_upper(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(holtsmark_quantile_upper_std64) +{ + do_test_holtsmark_quantile_upper(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(holtsmark_quantile_upper_fp128) +{ + do_test_holtsmark_quantile_upper(); +} +#endif + +BOOST_AUTO_TEST_CASE(holtsmark_locscale_fp64) +{ + do_test_holtsmark_locscale_param(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(holtsmark_locscale_std64) +{ + do_test_holtsmark_locscale_param(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(holtsmark_locscale_fp128) +{ + do_test_holtsmark_locscale_param(); +} +#endif diff --git a/test/test_holtsmark_cdf_double.cu b/test/test_holtsmark_cdf_double.cu new file mode 100644 index 0000000000..6b1d57041c --- /dev/null +++ b/test/test_holtsmark_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::holtsmark_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::holtsmark_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_holtsmark_cdf_float.cu b/test/test_holtsmark_cdf_float.cu new file mode 100644 index 0000000000..2a3533bac9 --- /dev/null +++ b/test/test_holtsmark_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::holtsmark_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::holtsmark_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_holtsmark_cdf_nvrtc_double.cpp b/test/test_holtsmark_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..3b8c9ba946 --- /dev/null +++ b/test/test_holtsmark_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_holtsmark_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::holtsmark_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_holtsmark_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_holtsmark_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_holtsmark_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::holtsmark_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_holtsmark_cdf_nvrtc_float.cpp b/test/test_holtsmark_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..a3ffec5942 --- /dev/null +++ b/test/test_holtsmark_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_holtsmark_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::holtsmark_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_holtsmark_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_holtsmark_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_holtsmark_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::holtsmark_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_holtsmark_pdf_double.cu b/test/test_holtsmark_pdf_double.cu new file mode 100644 index 0000000000..a53360d200 --- /dev/null +++ b/test/test_holtsmark_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::holtsmark_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::holtsmark_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_holtsmark_pdf_float.cu b/test/test_holtsmark_pdf_float.cu new file mode 100644 index 0000000000..57052803fc --- /dev/null +++ b/test/test_holtsmark_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::holtsmark_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::holtsmark_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_holtsmark_pdf_nvrtc_double.cpp b/test/test_holtsmark_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..a8355d368c --- /dev/null +++ b/test/test_holtsmark_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_holtsmark_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::holtsmark_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_holtsmark_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_holtsmark_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_holtsmark_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::holtsmark_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_holtsmark_pdf_nvrtc_float.cpp b/test/test_holtsmark_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..7a02d19832 --- /dev/null +++ b/test/test_holtsmark_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_holtsmark_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::holtsmark_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_holtsmark_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_holtsmark_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_holtsmark_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::holtsmark_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_holtsmark_quan_nvrtc_double.cpp b/test/test_holtsmark_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..1c2cc61fa3 --- /dev/null +++ b/test/test_holtsmark_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_holtsmark_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::holtsmark_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_holtsmark_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_holtsmark_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_holtsmark_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::holtsmark_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_holtsmark_quan_nvrtc_float.cpp b/test/test_holtsmark_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..a343f232e0 --- /dev/null +++ b/test/test_holtsmark_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_holtsmark_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::holtsmark_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_holtsmark_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_holtsmark_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_holtsmark_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::holtsmark_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ibeta.cpp b/test/test_ibeta.cpp index e026ac6c52..987b361105 100644 --- a/test/test_ibeta.cpp +++ b/test/test_ibeta.cpp @@ -3,7 +3,18 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wliteral-range" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Woverflow" +#endif + #include "test_ibeta.hpp" #if !defined(TEST_FLOAT) && !defined(TEST_DOUBLE) && !defined(TEST_LDOUBLE) && !defined(TEST_REAL_CONCEPT) diff --git a/test/test_ibeta.hpp b/test/test_ibeta.hpp index 7c951d614f..cfd5d78cd1 100644 --- a/test/test_ibeta.hpp +++ b/test/test_ibeta.hpp @@ -8,9 +8,10 @@ #define BOOST_TEST_MAIN #include #include +#include #include #include -#include +#include "../include_private/boost/math/tools/test.hpp" #include #include #include diff --git a/test/test_ibeta_derivative.cpp b/test/test_ibeta_derivative.cpp index c899c94bf5..5d6a312754 100644 --- a/test/test_ibeta_derivative.cpp +++ b/test/test_ibeta_derivative.cpp @@ -4,7 +4,7 @@ // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) #if defined(__GNUC__) && __GNUC__ <= 12 #pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wliteral-range" +#pragma GCC diagnostic ignored "-Woverflow" #endif #include #include "test_ibeta_derivative.hpp" diff --git a/test/test_ibeta_derivative_double.cu b/test/test_ibeta_derivative_double.cu new file mode 100644 index 0000000000..e5f7f340ba --- /dev/null +++ b/test/test_ibeta_derivative_double.cu @@ -0,0 +1,149 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// floating-point value does not fit in required floating-point type +#pragma nv_diag_suppress 221 + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ibeta_derivative(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ibeta_data.size(); ++i) + { + v1.push_back(ibeta_data[i][0]); + v2.push_back(ibeta_data[i][1]); + v3.push_back(ibeta_data[i][2]); + } + for(unsigned i = 0; i < ibeta_small_data.size(); ++i) + { + v1.push_back(ibeta_small_data[i][0]); + v2.push_back(ibeta_small_data[i][1]); + v3.push_back(ibeta_small_data[i][2]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ibeta_derivative(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + bool failed = false; + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::isfinite(output_vector[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + + diff --git a/test/test_ibeta_derivative_float.cu b/test/test_ibeta_derivative_float.cu new file mode 100644 index 0000000000..36a79665d4 --- /dev/null +++ b/test/test_ibeta_derivative_float.cu @@ -0,0 +1,149 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// floating-point value does not fit in required floating-point type +#pragma nv_diag_suppress 221 + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ibeta_derivative(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ibeta_data.size(); ++i) + { + v1.push_back(ibeta_data[i][0]); + v2.push_back(ibeta_data[i][1]); + v3.push_back(ibeta_data[i][2]); + } + for(unsigned i = 0; i < ibeta_small_data.size(); ++i) + { + v1.push_back(ibeta_small_data[i][0]); + v2.push_back(ibeta_small_data[i][1]); + v3.push_back(ibeta_small_data[i][2]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ibeta_derivative(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + bool failed = false; + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::isfinite(output_vector[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + + diff --git a/test/test_ibeta_derivative_nvrtc_double.cpp b/test/test_ibeta_derivative_nvrtc_double.cpp new file mode 100644 index 0000000000..f15d21db00 --- /dev/null +++ b/test/test_ibeta_derivative_nvrtc_double.cpp @@ -0,0 +1,207 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_ibeta_derivative_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ibeta_derivative(in1[i], in2[i], in3[i]); + } +} +)"; + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibeta_derivative_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ibeta_derivative_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ibeta_derivative_kernel"), "Failed to get kernel function"); + + int numElements = ibeta_data.size() + ibeta_small_data.size(); + float_type *h_in1, *h_in2, *h_in3, *h_out; + float_type *d_in1, *d_in2, *d_in3, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_in3 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + for (int i = 0; i < ibeta_data.size(); ++i) + { + h_in1[i] = ibeta_data[i][0]; + h_in2[i] = ibeta_data[i][1]; + h_in3[i] = ibeta_data[i][2]; + } + for (int i = 0; i < ibeta_small_data.size(); ++i) + { + h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0]; + h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1]; + h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2]; + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::ibeta_derivative(h_in1[i], h_in2[i], h_in3[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_in3; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ibeta_derivative_nvrtc_float.cpp b/test/test_ibeta_derivative_nvrtc_float.cpp new file mode 100644 index 0000000000..17443e0bdc --- /dev/null +++ b/test/test_ibeta_derivative_nvrtc_float.cpp @@ -0,0 +1,207 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_ibeta_derivative_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ibeta_derivative(in1[i], in2[i], in3[i]); + } +} +)"; + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibeta_derivative_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ibeta_derivative_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ibeta_derivative_kernel"), "Failed to get kernel function"); + + int numElements = ibeta_data.size() + ibeta_small_data.size(); + float_type *h_in1, *h_in2, *h_in3, *h_out; + float_type *d_in1, *d_in2, *d_in3, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_in3 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + for (int i = 0; i < ibeta_data.size(); ++i) + { + h_in1[i] = ibeta_data[i][0]; + h_in2[i] = ibeta_data[i][1]; + h_in3[i] = ibeta_data[i][2]; + } + for (int i = 0; i < ibeta_small_data.size(); ++i) + { + h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0]; + h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1]; + h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2]; + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::ibeta_derivative(h_in1[i], h_in2[i], h_in3[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_in3; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ibeta_double.cu b/test/test_ibeta_double.cu new file mode 100644 index 0000000000..20384bf25f --- /dev/null +++ b/test/test_ibeta_double.cu @@ -0,0 +1,149 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// floating-point value does not fit in required floating-point type +#pragma nv_diag_suppress 221 + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ibeta(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ibeta_data.size(); ++i) + { + v1.push_back(ibeta_data[i][0]); + v2.push_back(ibeta_data[i][1]); + v3.push_back(ibeta_data[i][2]); + } + for(unsigned i = 0; i < ibeta_small_data.size(); ++i) + { + v1.push_back(ibeta_small_data[i][0]); + v2.push_back(ibeta_small_data[i][1]); + v3.push_back(ibeta_small_data[i][2]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ibeta(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + bool failed = false; + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::isfinite(output_vector[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + + diff --git a/test/test_ibeta_float.cu b/test/test_ibeta_float.cu new file mode 100644 index 0000000000..be17813ee4 --- /dev/null +++ b/test/test_ibeta_float.cu @@ -0,0 +1,149 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// floating-point value does not fit in required floating-point type +#pragma nv_diag_suppress 221 + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ibeta(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ibeta_data.size(); ++i) + { + v1.push_back(ibeta_data[i][0]); + v2.push_back(ibeta_data[i][1]); + v3.push_back(ibeta_data[i][2]); + } + for(unsigned i = 0; i < ibeta_small_data.size(); ++i) + { + v1.push_back(ibeta_small_data[i][0]); + v2.push_back(ibeta_small_data[i][1]); + v3.push_back(ibeta_small_data[i][2]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ibeta(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + bool failed = false; + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::isfinite(output_vector[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + + diff --git a/test/test_ibeta_inv.cpp b/test/test_ibeta_inv.cpp index 218c1625e8..ab1f4267fc 100644 --- a/test/test_ibeta_inv.cpp +++ b/test/test_ibeta_inv.cpp @@ -3,7 +3,18 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wliteral-range" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Woverflow" +#endif + #include"test_ibeta_inv.hpp" #if !defined(TEST_FLOAT) && !defined(TEST_DOUBLE) && !defined(TEST_LDOUBLE) && !defined(TEST_REAL_CONCEPT) diff --git a/test/test_ibeta_inv.hpp b/test/test_ibeta_inv.hpp index ba98901773..fa765b2ef8 100644 --- a/test/test_ibeta_inv.hpp +++ b/test/test_ibeta_inv.hpp @@ -8,10 +8,11 @@ #define BOOST_TEST_MAIN #include #include +#include #include // for has_denorm_now #include #include -#include +#include "../include_private/boost/math/tools/test.hpp" #include #include #include @@ -306,6 +307,7 @@ void test_spots(T) BOOST_MATH_CHECK_THROW(::boost::math::ibeta_inv(static_cast(2.125), -n, static_cast(0.125)), std::domain_error); BOOST_MATH_CHECK_THROW(::boost::math::ibeta_inv(static_cast(2.125), static_cast(1.125), -n), std::domain_error); } + #ifndef SYCL_LANGUAGE_VERSION if (boost::math::detail::has_denorm_now()) { T m = std::numeric_limits::denorm_min(); @@ -317,5 +319,6 @@ void test_spots(T) BOOST_CHECK((boost::math::isfinite)(boost::math::ibeta_inv(static_cast(12.125), m, static_cast(0.125)))); BOOST_CHECK((boost::math::isfinite)(boost::math::ibeta_inv(m, m, static_cast(0.125)))); } + #endif } diff --git a/test/test_ibeta_inv_ab.cpp b/test/test_ibeta_inv_ab.cpp index c1acb2d1ca..fdf735ef1e 100644 --- a/test/test_ibeta_inv_ab.cpp +++ b/test/test_ibeta_inv_ab.cpp @@ -3,7 +3,18 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wliteral-range" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Woverflow" +#endif + #include "test_ibeta_inv_ab.hpp" #if !defined(TEST_FLOAT) && !defined(TEST_DOUBLE) && !defined(TEST_LDOUBLE) && !defined(TEST_REAL_CONCEPT) diff --git a/test/test_ibeta_inv_ab.hpp b/test/test_ibeta_inv_ab.hpp index c378d15287..b91ab5261d 100644 --- a/test/test_ibeta_inv_ab.hpp +++ b/test/test_ibeta_inv_ab.hpp @@ -10,9 +10,10 @@ #define BOOST_TEST_MAIN #include #include +#include #include #include -#include +#include "../include_private/boost/math/tools/test.hpp" #include #include #include diff --git a/test/test_ibeta_inv_double.cu b/test/test_ibeta_inv_double.cu new file mode 100644 index 0000000000..ef62c5e162 --- /dev/null +++ b/test/test_ibeta_inv_double.cu @@ -0,0 +1,149 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// floating-point value does not fit in required floating-point type +#pragma nv_diag_suppress 221 + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ibeta_inv(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ibeta_data.size(); ++i) + { + v1.push_back(ibeta_data[i][0]); + v2.push_back(ibeta_data[i][1]); + v3.push_back(ibeta_data[i][2]); + } + for(unsigned i = 0; i < ibeta_small_data.size(); ++i) + { + v1.push_back(ibeta_small_data[i][0]); + v2.push_back(ibeta_small_data[i][1]); + v3.push_back(ibeta_small_data[i][2]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ibeta_inv(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + bool failed = false; + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::isfinite(output_vector[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + + diff --git a/test/test_ibeta_inv_float.cu b/test/test_ibeta_inv_float.cu new file mode 100644 index 0000000000..a0d48bfbda --- /dev/null +++ b/test/test_ibeta_inv_float.cu @@ -0,0 +1,149 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// floating-point value does not fit in required floating-point type +#pragma nv_diag_suppress 221 + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ibeta_inv(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ibeta_data.size(); ++i) + { + v1.push_back(ibeta_data[i][0]); + v2.push_back(ibeta_data[i][1]); + v3.push_back(ibeta_data[i][2]); + } + for(unsigned i = 0; i < ibeta_small_data.size(); ++i) + { + v1.push_back(ibeta_small_data[i][0]); + v2.push_back(ibeta_small_data[i][1]); + v3.push_back(ibeta_small_data[i][2]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ibeta_inv(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + bool failed = false; + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::isfinite(output_vector[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + + diff --git a/test/test_ibeta_inv_nvrtc_double.cpp b/test/test_ibeta_inv_nvrtc_double.cpp new file mode 100644 index 0000000000..2f01012bbe --- /dev/null +++ b/test/test_ibeta_inv_nvrtc_double.cpp @@ -0,0 +1,207 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_ibeta_inv_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ibeta_inv(in1[i], in2[i], in3[i]); + } +} +)"; + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibeta_inv_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ibeta_inv_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ibeta_inv_kernel"), "Failed to get kernel function"); + + int numElements = ibeta_data.size() + ibeta_small_data.size(); + float_type *h_in1, *h_in2, *h_in3, *h_out; + float_type *d_in1, *d_in2, *d_in3, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_in3 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + for (int i = 0; i < ibeta_data.size(); ++i) + { + h_in1[i] = ibeta_data[i][0]; + h_in2[i] = ibeta_data[i][1]; + h_in3[i] = ibeta_data[i][2]; + } + for (int i = 0; i < ibeta_small_data.size(); ++i) + { + h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0]; + h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1]; + h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2]; + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::ibeta_inv(h_in1[i], h_in2[i], h_in3[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_in3; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ibeta_inv_nvrtc_float.cpp b/test/test_ibeta_inv_nvrtc_float.cpp new file mode 100644 index 0000000000..5d804398cb --- /dev/null +++ b/test/test_ibeta_inv_nvrtc_float.cpp @@ -0,0 +1,207 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_ibeta_inv_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ibeta_inv(in1[i], in2[i], in3[i]); + } +} +)"; + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibeta_inv_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ibeta_inv_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ibeta_inv_kernel"), "Failed to get kernel function"); + + int numElements = ibeta_data.size() + ibeta_small_data.size(); + float_type *h_in1, *h_in2, *h_in3, *h_out; + float_type *d_in1, *d_in2, *d_in3, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_in3 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + for (int i = 0; i < ibeta_data.size(); ++i) + { + h_in1[i] = ibeta_data[i][0]; + h_in2[i] = ibeta_data[i][1]; + h_in3[i] = ibeta_data[i][2]; + } + for (int i = 0; i < ibeta_small_data.size(); ++i) + { + h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0]; + h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1]; + h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2]; + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::ibeta_inv(h_in1[i], h_in2[i], h_in3[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_in3; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ibeta_inva_double.cu b/test/test_ibeta_inva_double.cu new file mode 100644 index 0000000000..7783eb21bb --- /dev/null +++ b/test/test_ibeta_inva_double.cu @@ -0,0 +1,149 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// floating-point value does not fit in required floating-point type +#pragma nv_diag_suppress 221 + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ibeta_inva(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ibeta_data.size(); ++i) + { + v1.push_back(ibeta_data[i][0]); + v2.push_back(ibeta_data[i][1]); + v3.push_back(ibeta_data[i][2]); + } + for(unsigned i = 0; i < ibeta_small_data.size(); ++i) + { + v1.push_back(ibeta_small_data[i][0]); + v2.push_back(ibeta_small_data[i][1]); + v3.push_back(ibeta_small_data[i][2]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ibeta_inva(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + bool failed = false; + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::isfinite(output_vector[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + + diff --git a/test/test_ibeta_inva_float.cu b/test/test_ibeta_inva_float.cu new file mode 100644 index 0000000000..ff918f9436 --- /dev/null +++ b/test/test_ibeta_inva_float.cu @@ -0,0 +1,149 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// floating-point value does not fit in required floating-point type +#pragma nv_diag_suppress 221 + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ibeta_inva(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ibeta_data.size(); ++i) + { + v1.push_back(ibeta_data[i][0]); + v2.push_back(ibeta_data[i][1]); + v3.push_back(ibeta_data[i][2]); + } + for(unsigned i = 0; i < ibeta_small_data.size(); ++i) + { + v1.push_back(ibeta_small_data[i][0]); + v2.push_back(ibeta_small_data[i][1]); + v3.push_back(ibeta_small_data[i][2]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ibeta_inva(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + bool failed = false; + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::isfinite(output_vector[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + + diff --git a/test/test_ibeta_inva_nvrtc_double.cpp b/test/test_ibeta_inva_nvrtc_double.cpp new file mode 100644 index 0000000000..a392eaea65 --- /dev/null +++ b/test/test_ibeta_inva_nvrtc_double.cpp @@ -0,0 +1,220 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false +typedef double float_type; +#include +#include +extern "C" __global__ +void test_ibeta_inva_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ibeta_inva(in1[i], in2[i], in3[i]); + } +} +)"; + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibeta_inva_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ibeta_inva_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ibeta_inva_kernel"), "Failed to get kernel function"); + + int numElements = ibeta_data.size() + ibeta_small_data.size(); + float_type *h_in1, *h_in2, *h_in3, *h_out; + float_type *d_in1, *d_in2, *d_in3, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_in3 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + for (int i = 0; i < ibeta_data.size(); ++i) + { + h_in1[i] = ibeta_data[i][0]; + h_in2[i] = ibeta_data[i][1]; + h_in3[i] = ibeta_data[i][2]; + } + for (int i = 0; i < ibeta_small_data.size(); ++i) + { + h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0]; + h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1]; + h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2]; + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + // Sometimes the ignore error policy is ignored so the below throws + // Rather than terminating we can continue to process through our results array + double res; + try + { + res = boost::math::ibeta_inva(h_in1[i], h_in2[i], h_in3[i]); + } + catch (...) + { + continue; + } + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_in3; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ibeta_inva_nvrtc_float.cpp b/test/test_ibeta_inva_nvrtc_float.cpp new file mode 100644 index 0000000000..ba5745c321 --- /dev/null +++ b/test/test_ibeta_inva_nvrtc_float.cpp @@ -0,0 +1,220 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false +typedef float float_type; +#include +#include +extern "C" __global__ +void test_ibeta_inva_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ibeta_inva(in1[i], in2[i], in3[i]); + } +} +)"; + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibeta_inva_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ibeta_inva_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ibeta_inva_kernel"), "Failed to get kernel function"); + + int numElements = ibeta_data.size() + ibeta_small_data.size(); + float_type *h_in1, *h_in2, *h_in3, *h_out; + float_type *d_in1, *d_in2, *d_in3, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_in3 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + for (int i = 0; i < ibeta_data.size(); ++i) + { + h_in1[i] = ibeta_data[i][0]; + h_in2[i] = ibeta_data[i][1]; + h_in3[i] = ibeta_data[i][2]; + } + for (int i = 0; i < ibeta_small_data.size(); ++i) + { + h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0]; + h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1]; + h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2]; + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + // Sometimes the ignore error policy is ignored so the below throws + // Rather than terminating we can continue to process through our results array + double res; + try + { + res = boost::math::ibeta_inva(h_in1[i], h_in2[i], h_in3[i]); + } + catch (...) + { + continue; + } + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_in3; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ibeta_invb_double.cu b/test/test_ibeta_invb_double.cu new file mode 100644 index 0000000000..562f5349dd --- /dev/null +++ b/test/test_ibeta_invb_double.cu @@ -0,0 +1,149 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// floating-point value does not fit in required floating-point type +#pragma nv_diag_suppress 221 + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ibeta_invb(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ibeta_data.size(); ++i) + { + v1.push_back(ibeta_data[i][0]); + v2.push_back(ibeta_data[i][1]); + v3.push_back(ibeta_data[i][2]); + } + for(unsigned i = 0; i < ibeta_small_data.size(); ++i) + { + v1.push_back(ibeta_small_data[i][0]); + v2.push_back(ibeta_small_data[i][1]); + v3.push_back(ibeta_small_data[i][2]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ibeta_invb(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + bool failed = false; + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::isfinite(output_vector[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + + diff --git a/test/test_ibeta_invb_float.cu b/test/test_ibeta_invb_float.cu new file mode 100644 index 0000000000..86f5615c36 --- /dev/null +++ b/test/test_ibeta_invb_float.cu @@ -0,0 +1,149 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// floating-point value does not fit in required floating-point type +#pragma nv_diag_suppress 221 + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ibeta_invb(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ibeta_data.size(); ++i) + { + v1.push_back(ibeta_data[i][0]); + v2.push_back(ibeta_data[i][1]); + v3.push_back(ibeta_data[i][2]); + } + for(unsigned i = 0; i < ibeta_small_data.size(); ++i) + { + v1.push_back(ibeta_small_data[i][0]); + v2.push_back(ibeta_small_data[i][1]); + v3.push_back(ibeta_small_data[i][2]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ibeta_invb(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + bool failed = false; + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::isfinite(output_vector[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + + diff --git a/test/test_ibeta_invb_nvrtc_double.cpp b/test/test_ibeta_invb_nvrtc_double.cpp new file mode 100644 index 0000000000..6f046f09f3 --- /dev/null +++ b/test/test_ibeta_invb_nvrtc_double.cpp @@ -0,0 +1,220 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false +typedef double float_type; +#include +#include +extern "C" __global__ +void test_ibeta_invb_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ibeta_invb(in1[i], in2[i], in3[i]); + } +} +)"; + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibeta_invb_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ibeta_invb_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ibeta_invb_kernel"), "Failed to get kernel function"); + + int numElements = ibeta_data.size() + ibeta_small_data.size(); + float_type *h_in1, *h_in2, *h_in3, *h_out; + float_type *d_in1, *d_in2, *d_in3, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_in3 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + for (int i = 0; i < ibeta_data.size(); ++i) + { + h_in1[i] = ibeta_data[i][0]; + h_in2[i] = ibeta_data[i][1]; + h_in3[i] = ibeta_data[i][2]; + } + for (int i = 0; i < ibeta_small_data.size(); ++i) + { + h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0]; + h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1]; + h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2]; + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + // Sometimes the ignore error policy is ignored so the below throws + // Rather than terminating we can continue to process through our results array + double res; + try + { + res = boost::math::ibeta_invb(h_in1[i], h_in2[i], h_in3[i]); + } + catch (...) + { + continue; + } + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_in3; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ibeta_invb_nvrtc_float.cpp b/test/test_ibeta_invb_nvrtc_float.cpp new file mode 100644 index 0000000000..f2d17b8447 --- /dev/null +++ b/test/test_ibeta_invb_nvrtc_float.cpp @@ -0,0 +1,220 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false +typedef float float_type; +#include +#include +extern "C" __global__ +void test_ibeta_invb_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ibeta_invb(in1[i], in2[i], in3[i]); + } +} +)"; + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibeta_invb_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ibeta_invb_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ibeta_invb_kernel"), "Failed to get kernel function"); + + int numElements = ibeta_data.size() + ibeta_small_data.size(); + float_type *h_in1, *h_in2, *h_in3, *h_out; + float_type *d_in1, *d_in2, *d_in3, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_in3 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + for (int i = 0; i < ibeta_data.size(); ++i) + { + h_in1[i] = ibeta_data[i][0]; + h_in2[i] = ibeta_data[i][1]; + h_in3[i] = ibeta_data[i][2]; + } + for (int i = 0; i < ibeta_small_data.size(); ++i) + { + h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0]; + h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1]; + h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2]; + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + // Sometimes the ignore error policy is ignored so the below throws + // Rather than terminating we can continue to process through our results array + double res; + try + { + res = boost::math::ibeta_invb(h_in1[i], h_in2[i], h_in3[i]); + } + catch (...) + { + continue; + } + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_in3; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ibeta_nvrtc_double.cpp b/test/test_ibeta_nvrtc_double.cpp new file mode 100644 index 0000000000..bc920b6368 --- /dev/null +++ b/test/test_ibeta_nvrtc_double.cpp @@ -0,0 +1,207 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_ibeta_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ibeta(in1[i], in2[i], in3[i]); + } +} +)"; + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibeta_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ibeta_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ibeta_kernel"), "Failed to get kernel function"); + + int numElements = ibeta_data.size() + ibeta_small_data.size(); + float_type *h_in1, *h_in2, *h_in3, *h_out; + float_type *d_in1, *d_in2, *d_in3, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_in3 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + for (int i = 0; i < ibeta_data.size(); ++i) + { + h_in1[i] = ibeta_data[i][0]; + h_in2[i] = ibeta_data[i][1]; + h_in3[i] = ibeta_data[i][2]; + } + for (int i = 0; i < ibeta_small_data.size(); ++i) + { + h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0]; + h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1]; + h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2]; + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::ibeta(h_in1[i], h_in2[i], h_in3[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_in3; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ibeta_nvrtc_float.cpp b/test/test_ibeta_nvrtc_float.cpp new file mode 100644 index 0000000000..ee15748628 --- /dev/null +++ b/test/test_ibeta_nvrtc_float.cpp @@ -0,0 +1,207 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_ibeta_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ibeta(in1[i], in2[i], in3[i]); + } +} +)"; + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibeta_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ibeta_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ibeta_kernel"), "Failed to get kernel function"); + + int numElements = ibeta_data.size() + ibeta_small_data.size(); + float_type *h_in1, *h_in2, *h_in3, *h_out; + float_type *d_in1, *d_in2, *d_in3, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_in3 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + for (int i = 0; i < ibeta_data.size(); ++i) + { + h_in1[i] = ibeta_data[i][0]; + h_in2[i] = ibeta_data[i][1]; + h_in3[i] = ibeta_data[i][2]; + } + for (int i = 0; i < ibeta_small_data.size(); ++i) + { + h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0]; + h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1]; + h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2]; + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::ibeta(h_in1[i], h_in2[i], h_in3[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_in3; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ibetac_inv_double.cu b/test/test_ibetac_inv_double.cu new file mode 100644 index 0000000000..a983d16677 --- /dev/null +++ b/test/test_ibetac_inv_double.cu @@ -0,0 +1,149 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// floating-point value does not fit in required floating-point type +#pragma nv_diag_suppress 221 + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ibetac_inv(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ibeta_data.size(); ++i) + { + v1.push_back(ibeta_data[i][0]); + v2.push_back(ibeta_data[i][1]); + v3.push_back(ibeta_data[i][2]); + } + for(unsigned i = 0; i < ibeta_small_data.size(); ++i) + { + v1.push_back(ibeta_small_data[i][0]); + v2.push_back(ibeta_small_data[i][1]); + v3.push_back(ibeta_small_data[i][2]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ibetac_inv(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + bool failed = false; + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::isfinite(output_vector[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + + diff --git a/test/test_ibetac_inv_float.cu b/test/test_ibetac_inv_float.cu new file mode 100644 index 0000000000..94583b45e2 --- /dev/null +++ b/test/test_ibetac_inv_float.cu @@ -0,0 +1,149 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// floating-point value does not fit in required floating-point type +#pragma nv_diag_suppress 221 + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ibetac_inv(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ibeta_data.size(); ++i) + { + v1.push_back(ibeta_data[i][0]); + v2.push_back(ibeta_data[i][1]); + v3.push_back(ibeta_data[i][2]); + } + for(unsigned i = 0; i < ibeta_small_data.size(); ++i) + { + v1.push_back(ibeta_small_data[i][0]); + v2.push_back(ibeta_small_data[i][1]); + v3.push_back(ibeta_small_data[i][2]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ibetac_inv(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + bool failed = false; + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::isfinite(output_vector[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + + diff --git a/test/test_ibetac_inv_nvrtc_double.cpp b/test/test_ibetac_inv_nvrtc_double.cpp new file mode 100644 index 0000000000..a99d53b3cd --- /dev/null +++ b/test/test_ibetac_inv_nvrtc_double.cpp @@ -0,0 +1,207 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_ibetac_inv_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ibetac_inv(in1[i], in2[i], in3[i]); + } +} +)"; + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibetac_inv_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ibetac_inv_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ibetac_inv_kernel"), "Failed to get kernel function"); + + int numElements = ibeta_data.size() + ibeta_small_data.size(); + float_type *h_in1, *h_in2, *h_in3, *h_out; + float_type *d_in1, *d_in2, *d_in3, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_in3 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + for (int i = 0; i < ibeta_data.size(); ++i) + { + h_in1[i] = ibeta_data[i][0]; + h_in2[i] = ibeta_data[i][1]; + h_in3[i] = ibeta_data[i][2]; + } + for (int i = 0; i < ibeta_small_data.size(); ++i) + { + h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0]; + h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1]; + h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2]; + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::ibetac_inv(h_in1[i], h_in2[i], h_in3[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_in3; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ibetac_inv_nvrtc_float.cpp b/test/test_ibetac_inv_nvrtc_float.cpp new file mode 100644 index 0000000000..47e89db4c1 --- /dev/null +++ b/test/test_ibetac_inv_nvrtc_float.cpp @@ -0,0 +1,207 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_ibetac_inv_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ibetac_inv(in1[i], in2[i], in3[i]); + } +} +)"; + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibetac_inv_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ibetac_inv_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ibetac_inv_kernel"), "Failed to get kernel function"); + + int numElements = ibeta_data.size() + ibeta_small_data.size(); + float_type *h_in1, *h_in2, *h_in3, *h_out; + float_type *d_in1, *d_in2, *d_in3, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_in3 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + for (int i = 0; i < ibeta_data.size(); ++i) + { + h_in1[i] = ibeta_data[i][0]; + h_in2[i] = ibeta_data[i][1]; + h_in3[i] = ibeta_data[i][2]; + } + for (int i = 0; i < ibeta_small_data.size(); ++i) + { + h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0]; + h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1]; + h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2]; + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::ibetac_inv(h_in1[i], h_in2[i], h_in3[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_in3; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ibetac_inva_double.cu b/test/test_ibetac_inva_double.cu new file mode 100644 index 0000000000..2efbee265d --- /dev/null +++ b/test/test_ibetac_inva_double.cu @@ -0,0 +1,149 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// floating-point value does not fit in required floating-point type +#pragma nv_diag_suppress 221 + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ibetac_inva(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ibeta_data.size(); ++i) + { + v1.push_back(ibeta_data[i][0]); + v2.push_back(ibeta_data[i][1]); + v3.push_back(ibeta_data[i][2]); + } + for(unsigned i = 0; i < ibeta_small_data.size(); ++i) + { + v1.push_back(ibeta_small_data[i][0]); + v2.push_back(ibeta_small_data[i][1]); + v3.push_back(ibeta_small_data[i][2]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ibetac_inva(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + bool failed = false; + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::isfinite(output_vector[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + + diff --git a/test/test_ibetac_inva_float.cu b/test/test_ibetac_inva_float.cu new file mode 100644 index 0000000000..9bd1a29a07 --- /dev/null +++ b/test/test_ibetac_inva_float.cu @@ -0,0 +1,149 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// floating-point value does not fit in required floating-point type +#pragma nv_diag_suppress 221 + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ibetac_inva(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ibeta_data.size(); ++i) + { + v1.push_back(ibeta_data[i][0]); + v2.push_back(ibeta_data[i][1]); + v3.push_back(ibeta_data[i][2]); + } + for(unsigned i = 0; i < ibeta_small_data.size(); ++i) + { + v1.push_back(ibeta_small_data[i][0]); + v2.push_back(ibeta_small_data[i][1]); + v3.push_back(ibeta_small_data[i][2]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ibetac_inva(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + bool failed = false; + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::isfinite(output_vector[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + + diff --git a/test/test_ibetac_inva_nvrtc_double.cpp b/test/test_ibetac_inva_nvrtc_double.cpp new file mode 100644 index 0000000000..7c7bf992b3 --- /dev/null +++ b/test/test_ibetac_inva_nvrtc_double.cpp @@ -0,0 +1,220 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false +typedef double float_type; +#include +#include +extern "C" __global__ +void test_ibetac_inva_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ibetac_inva(in1[i], in2[i], in3[i]); + } +} +)"; + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibetac_inva_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ibetac_inva_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ibetac_inva_kernel"), "Failed to get kernel function"); + + int numElements = ibeta_data.size() + ibeta_small_data.size(); + float_type *h_in1, *h_in2, *h_in3, *h_out; + float_type *d_in1, *d_in2, *d_in3, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_in3 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + for (int i = 0; i < ibeta_data.size(); ++i) + { + h_in1[i] = ibeta_data[i][0]; + h_in2[i] = ibeta_data[i][1]; + h_in3[i] = ibeta_data[i][2]; + } + for (int i = 0; i < ibeta_small_data.size(); ++i) + { + h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0]; + h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1]; + h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2]; + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + // Sometimes the ignore error policy is ignored so the below throws + // Rather than terminating we can continue to process through our results array + double res; + try + { + res = boost::math::ibetac_inva(h_in1[i], h_in2[i], h_in3[i]); + } + catch (...) + { + continue; + } + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_in3; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ibetac_inva_nvrtc_float.cpp b/test/test_ibetac_inva_nvrtc_float.cpp new file mode 100644 index 0000000000..c79b8b02f1 --- /dev/null +++ b/test/test_ibetac_inva_nvrtc_float.cpp @@ -0,0 +1,220 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false +typedef float float_type; +#include +#include +extern "C" __global__ +void test_ibetac_inva_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ibetac_inva(in1[i], in2[i], in3[i]); + } +} +)"; + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibetac_inva_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ibetac_inva_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ibetac_inva_kernel"), "Failed to get kernel function"); + + int numElements = ibeta_data.size() + ibeta_small_data.size(); + float_type *h_in1, *h_in2, *h_in3, *h_out; + float_type *d_in1, *d_in2, *d_in3, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_in3 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + for (int i = 0; i < ibeta_data.size(); ++i) + { + h_in1[i] = ibeta_data[i][0]; + h_in2[i] = ibeta_data[i][1]; + h_in3[i] = ibeta_data[i][2]; + } + for (int i = 0; i < ibeta_small_data.size(); ++i) + { + h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0]; + h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1]; + h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2]; + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + // Sometimes the ignore error policy is ignored so the below throws + // Rather than terminating we can continue to process through our results array + double res; + try + { + res = boost::math::ibetac_inva(h_in1[i], h_in2[i], h_in3[i]); + } + catch (...) + { + continue; + } + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_in3; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ibetac_invb_double.cu b/test/test_ibetac_invb_double.cu new file mode 100644 index 0000000000..fddd655af2 --- /dev/null +++ b/test/test_ibetac_invb_double.cu @@ -0,0 +1,149 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// floating-point value does not fit in required floating-point type +#pragma nv_diag_suppress 221 + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ibetac_invb(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ibeta_data.size(); ++i) + { + v1.push_back(ibeta_data[i][0]); + v2.push_back(ibeta_data[i][1]); + v3.push_back(ibeta_data[i][2]); + } + for(unsigned i = 0; i < ibeta_small_data.size(); ++i) + { + v1.push_back(ibeta_small_data[i][0]); + v2.push_back(ibeta_small_data[i][1]); + v3.push_back(ibeta_small_data[i][2]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ibetac_invb(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + bool failed = false; + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::isfinite(output_vector[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + + diff --git a/test/test_ibetac_invb_float.cu b/test/test_ibetac_invb_float.cu new file mode 100644 index 0000000000..fddd655af2 --- /dev/null +++ b/test/test_ibetac_invb_float.cu @@ -0,0 +1,149 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// floating-point value does not fit in required floating-point type +#pragma nv_diag_suppress 221 + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ibetac_invb(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ibeta_data.size(); ++i) + { + v1.push_back(ibeta_data[i][0]); + v2.push_back(ibeta_data[i][1]); + v3.push_back(ibeta_data[i][2]); + } + for(unsigned i = 0; i < ibeta_small_data.size(); ++i) + { + v1.push_back(ibeta_small_data[i][0]); + v2.push_back(ibeta_small_data[i][1]); + v3.push_back(ibeta_small_data[i][2]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ibetac_invb(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + bool failed = false; + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::isfinite(output_vector[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + + diff --git a/test/test_ibetac_invb_nvrtc_double.cpp b/test/test_ibetac_invb_nvrtc_double.cpp new file mode 100644 index 0000000000..76f6318901 --- /dev/null +++ b/test/test_ibetac_invb_nvrtc_double.cpp @@ -0,0 +1,220 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false +typedef double float_type; +#include +#include +extern "C" __global__ +void test_ibetac_invb_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ibetac_invb(in1[i], in2[i], in3[i]); + } +} +)"; + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibetac_invb_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ibetac_invb_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ibetac_invb_kernel"), "Failed to get kernel function"); + + int numElements = ibeta_data.size() + ibeta_small_data.size(); + float_type *h_in1, *h_in2, *h_in3, *h_out; + float_type *d_in1, *d_in2, *d_in3, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_in3 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + for (int i = 0; i < ibeta_data.size(); ++i) + { + h_in1[i] = ibeta_data[i][0]; + h_in2[i] = ibeta_data[i][1]; + h_in3[i] = ibeta_data[i][2]; + } + for (int i = 0; i < ibeta_small_data.size(); ++i) + { + h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0]; + h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1]; + h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2]; + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + // Sometimes the ignore error policy is ignored so the below throws + // Rather than terminating we can continue to process through our results array + double res; + try + { + res = boost::math::ibetac_invb(h_in1[i], h_in2[i], h_in3[i]); + } + catch (...) + { + continue; + } + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_in3; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ibetac_invb_nvrtc_float.cpp b/test/test_ibetac_invb_nvrtc_float.cpp new file mode 100644 index 0000000000..48d0a31eec --- /dev/null +++ b/test/test_ibetac_invb_nvrtc_float.cpp @@ -0,0 +1,220 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false +typedef float float_type; +#include +#include +extern "C" __global__ +void test_ibetac_invb_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ibetac_invb(in1[i], in2[i], in3[i]); + } +} +)"; + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibetac_invb_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ibetac_invb_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ibetac_invb_kernel"), "Failed to get kernel function"); + + int numElements = ibeta_data.size() + ibeta_small_data.size(); + float_type *h_in1, *h_in2, *h_in3, *h_out; + float_type *d_in1, *d_in2, *d_in3, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_in3 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + for (int i = 0; i < ibeta_data.size(); ++i) + { + h_in1[i] = ibeta_data[i][0]; + h_in2[i] = ibeta_data[i][1]; + h_in3[i] = ibeta_data[i][2]; + } + for (int i = 0; i < ibeta_small_data.size(); ++i) + { + h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0]; + h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1]; + h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2]; + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + // Sometimes the ignore error policy is ignored so the below throws + // Rather than terminating we can continue to process through our results array + double res; + try + { + res = boost::math::ibetac_invb(h_in1[i], h_in2[i], h_in3[i]); + } + catch (...) + { + continue; + } + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_in3; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ibetac_nvrtc_double.cpp b/test/test_ibetac_nvrtc_double.cpp new file mode 100644 index 0000000000..6a59473e18 --- /dev/null +++ b/test/test_ibetac_nvrtc_double.cpp @@ -0,0 +1,207 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_ibetac_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ibetac(in1[i], in2[i], in3[i]); + } +} +)"; + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibetac_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ibetac_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ibetac_kernel"), "Failed to get kernel function"); + + int numElements = ibeta_data.size() + ibeta_small_data.size(); + float_type *h_in1, *h_in2, *h_in3, *h_out; + float_type *d_in1, *d_in2, *d_in3, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_in3 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + for (int i = 0; i < ibeta_data.size(); ++i) + { + h_in1[i] = ibeta_data[i][0]; + h_in2[i] = ibeta_data[i][1]; + h_in3[i] = ibeta_data[i][2]; + } + for (int i = 0; i < ibeta_small_data.size(); ++i) + { + h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0]; + h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1]; + h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2]; + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::ibetac(h_in1[i], h_in2[i], h_in3[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_in3; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_ibetac_nvrtc_float.cpp b/test/test_ibetac_nvrtc_float.cpp new file mode 100644 index 0000000000..a989191e51 --- /dev/null +++ b/test/test_ibetac_nvrtc_float.cpp @@ -0,0 +1,207 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_ibetac_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::ibetac(in1[i], in2[i], in3[i]); + } +} +)"; + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "ibeta_data.ipp" +#include "ibeta_small_data.ipp" + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibetac_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_ibetac_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_ibetac_kernel"), "Failed to get kernel function"); + + int numElements = ibeta_data.size() + ibeta_small_data.size(); + float_type *h_in1, *h_in2, *h_in3, *h_out; + float_type *d_in1, *d_in2, *d_in3, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_in3 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + for (int i = 0; i < ibeta_data.size(); ++i) + { + h_in1[i] = ibeta_data[i][0]; + h_in2[i] = ibeta_data[i][1]; + h_in3[i] = ibeta_data[i][2]; + } + for (int i = 0; i < ibeta_small_data.size(); ++i) + { + h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0]; + h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1]; + h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2]; + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::ibetac(h_in1[i], h_in2[i], h_in3[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_in3; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_igamma.cpp b/test/test_igamma.cpp index 8e80c772c4..6e034f3c60 100644 --- a/test/test_igamma.cpp +++ b/test/test_igamma.cpp @@ -3,7 +3,18 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wliteral-range" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Woverflow" +#endif + #include "test_igamma.hpp" // diff --git a/test/test_igamma.hpp b/test/test_igamma.hpp index b434f727ee..bfe386d4de 100644 --- a/test/test_igamma.hpp +++ b/test/test_igamma.hpp @@ -8,11 +8,12 @@ #include #include +#include #define BOOST_TEST_MAIN #include #include +#include "../include_private/boost/math/tools/test.hpp" #include -#include #include #include #include diff --git a/test/test_igamma_inv.cpp b/test/test_igamma_inv.cpp index eafed0e1da..80a553427c 100644 --- a/test/test_igamma_inv.cpp +++ b/test/test_igamma_inv.cpp @@ -3,7 +3,18 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wliteral-range" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Woverflow" +#endif +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error #include "test_igamma_inv.hpp" #if !defined(TEST_FLOAT) && !defined(TEST_DOUBLE) && !defined(TEST_LDOUBLE) && !defined(TEST_REAL_CONCEPT) @@ -89,14 +100,22 @@ void expected_results() "linux.*", // platform largest_type, // test type(s) "[^|]*medium[^|]*", // test data group + #ifdef SYCL_LANGUAGE_VERSION + "[^|]*", 350, 50); + #else "[^|]*", 350, 5); // test function + #endif add_expected_result( "[^|]*", // compiler "[^|]*", // stdlib "linux.*", // platform largest_type, // test type(s) "[^|]*large[^|]*", // test data group + #ifdef SYCL_LANGUAGE_VERSION + "[^|]*", 150, 20); // test function + #else "[^|]*", 150, 5); // test function + #endif // diff --git a/test/test_igamma_inv.hpp b/test/test_igamma_inv.hpp index 7330e918a7..cf481537e7 100644 --- a/test/test_igamma_inv.hpp +++ b/test/test_igamma_inv.hpp @@ -6,13 +6,14 @@ #include #include +#include #define BOOST_TEST_MAIN #include #include #include #include #include -#include +#include "../include_private/boost/math/tools/test.hpp" #include #include #include diff --git a/test/test_igamma_inva.cpp b/test/test_igamma_inva.cpp index 047df11735..443ad7bbc6 100644 --- a/test/test_igamma_inva.cpp +++ b/test/test_igamma_inva.cpp @@ -3,7 +3,18 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wliteral-range" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Woverflow" +#endif + #include "test_igamma_inva.hpp" #if !defined(TEST_FLOAT) && !defined(TEST_DOUBLE) && !defined(TEST_LDOUBLE) && !defined(TEST_REAL_CONCEPT) diff --git a/test/test_igamma_inva.hpp b/test/test_igamma_inva.hpp index 402ea2f8bc..d9d317da15 100644 --- a/test/test_igamma_inva.hpp +++ b/test/test_igamma_inva.hpp @@ -8,13 +8,14 @@ #include #include +#include #define BOOST_TEST_MAIN #include #include #include #include #include -#include +#include "../include_private/boost/math/tools/test.hpp" #include #include #include diff --git a/test/test_inverse_chi_squared_cdf_double.cu b/test/test_inverse_chi_squared_cdf_double.cu new file mode 100644 index 0000000000..9703e7a3a0 --- /dev/null +++ b/test/test_inverse_chi_squared_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::inverse_chi_squared_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch inverse_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::inverse_chi_squared_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_inverse_chi_squared_cdf_float.cu b/test/test_inverse_chi_squared_cdf_float.cu new file mode 100644 index 0000000000..bb56a48728 --- /dev/null +++ b/test/test_inverse_chi_squared_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::inverse_chi_squared_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch inverse_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::inverse_chi_squared_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_inverse_chi_squared_cdf_nvrtc_double.cpp b/test/test_inverse_chi_squared_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..b221aedaab --- /dev/null +++ b/test/test_inverse_chi_squared_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_inverse_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::inverse_chi_squared_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_chi_squared_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_inverse_chi_squared_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_chi_squared_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::inverse_chi_squared_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_inverse_chi_squared_cdf_nvrtc_float.cpp b/test/test_inverse_chi_squared_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..743654c149 --- /dev/null +++ b/test/test_inverse_chi_squared_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_inverse_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::inverse_chi_squared_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_chi_squared_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_inverse_chi_squared_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_chi_squared_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::inverse_chi_squared_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_inverse_chi_squared_distribution.cpp b/test/test_inverse_chi_squared_distribution.cpp index a697824185..cbc9dcf191 100644 --- a/test/test_inverse_chi_squared_distribution.cpp +++ b/test/test_inverse_chi_squared_distribution.cpp @@ -14,11 +14,14 @@ // http://www.wolframalpha.com/input/?i=inverse+chisquare+distribution -#include +#include +#include "../include_private/boost/math/tools/test.hpp" + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include // for real_concept using ::boost::math::concepts::real_concept; +#endif -//#include #define BOOST_TEST_MAIN #include // for test_main #include // for BOOST_CHECK_CLOSE_FRACTION diff --git a/test/test_inverse_chi_squared_pdf_double.cu b/test/test_inverse_chi_squared_pdf_double.cu new file mode 100644 index 0000000000..f306117493 --- /dev/null +++ b/test/test_inverse_chi_squared_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::inverse_chi_squared_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch inverse_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::inverse_chi_squared_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_inverse_chi_squared_pdf_float.cu b/test/test_inverse_chi_squared_pdf_float.cu new file mode 100644 index 0000000000..8a3d1c1ef3 --- /dev/null +++ b/test/test_inverse_chi_squared_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::inverse_chi_squared_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch inverse_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::inverse_chi_squared_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_inverse_chi_squared_pdf_nvrtc_double.cpp b/test/test_inverse_chi_squared_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..4608b3bd62 --- /dev/null +++ b/test/test_inverse_chi_squared_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_inverse_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::inverse_chi_squared_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_chi_squared_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_inverse_chi_squared_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_chi_squared_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::inverse_chi_squared_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_inverse_chi_squared_pdf_nvrtc_float.cpp b/test/test_inverse_chi_squared_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..8b4db55c0a --- /dev/null +++ b/test/test_inverse_chi_squared_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_inverse_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::inverse_chi_squared_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_chi_squared_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_inverse_chi_squared_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_chi_squared_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::inverse_chi_squared_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_inverse_chi_squared_quan_double.cu b/test/test_inverse_chi_squared_quan_double.cu new file mode 100644 index 0000000000..f9022c6a32 --- /dev/null +++ b/test/test_inverse_chi_squared_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::inverse_chi_squared_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch inverse_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::inverse_chi_squared_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_inverse_chi_squared_quan_float.cu b/test/test_inverse_chi_squared_quan_float.cu new file mode 100644 index 0000000000..10aa6d7075 --- /dev/null +++ b/test/test_inverse_chi_squared_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::inverse_chi_squared_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch inverse_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::inverse_chi_squared_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_inverse_chi_squared_quan_nvrtc_double.cpp b/test/test_inverse_chi_squared_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..0f8a9a5f82 --- /dev/null +++ b/test/test_inverse_chi_squared_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_inverse_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::inverse_chi_squared_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_chi_squared_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_inverse_chi_squared_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_chi_squared_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::inverse_chi_squared_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_inverse_chi_squared_quan_nvrtc_float.cpp b/test/test_inverse_chi_squared_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..ab494a8da3 --- /dev/null +++ b/test/test_inverse_chi_squared_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_inverse_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::inverse_chi_squared_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_chi_squared_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_inverse_chi_squared_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_chi_squared_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::inverse_chi_squared_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_inverse_gamma_cdf_double.cu b/test/test_inverse_gamma_cdf_double.cu new file mode 100644 index 0000000000..4368a2284b --- /dev/null +++ b/test/test_inverse_gamma_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::inverse_gamma_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch inverse_gamma distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::inverse_gamma_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_inverse_gamma_cdf_float.cu b/test/test_inverse_gamma_cdf_float.cu new file mode 100644 index 0000000000..cef2ec955f --- /dev/null +++ b/test/test_inverse_gamma_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::inverse_gamma_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch inverse_gamma distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::inverse_gamma_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_inverse_gamma_cdf_nvrtc_double.cpp b/test/test_inverse_gamma_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..c5a4b9878a --- /dev/null +++ b/test/test_inverse_gamma_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_inverse_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::inverse_gamma_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_inverse_gamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::inverse_gamma_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_inverse_gamma_cdf_nvrtc_float.cpp b/test/test_inverse_gamma_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..d76d512256 --- /dev/null +++ b/test/test_inverse_gamma_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_inverse_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::inverse_gamma_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_inverse_gamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::inverse_gamma_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_inverse_gamma_distribution.cpp b/test/test_inverse_gamma_distribution.cpp index 68b238fbc8..436131d83f 100644 --- a/test/test_inverse_gamma_distribution.cpp +++ b/test/test_inverse_gamma_distribution.cpp @@ -14,11 +14,14 @@ # pragma warning (disable : 4310) // cast truncates constant value #endif -#include +#include +#include "../include_private/boost/math/tools/test.hpp" + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT #include // for real_concept using ::boost::math::concepts::real_concept; +#endif -//#include #define BOOST_TEST_MAIN #include // for test_main #include // for BOOST_CHECK_CLOSE_FRACTION diff --git a/test/test_inverse_gamma_pdf_double.cu b/test/test_inverse_gamma_pdf_double.cu new file mode 100644 index 0000000000..fa5073dbe0 --- /dev/null +++ b/test/test_inverse_gamma_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::inverse_gamma_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch inverse_gamma distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::inverse_gamma_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_inverse_gamma_pdf_float.cu b/test/test_inverse_gamma_pdf_float.cu new file mode 100644 index 0000000000..c2d80fe8d5 --- /dev/null +++ b/test/test_inverse_gamma_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::inverse_gamma_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch inverse_gamma distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::inverse_gamma_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_inverse_gamma_pdf_nvrtc_double.cpp b/test/test_inverse_gamma_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..db2c8c4e19 --- /dev/null +++ b/test/test_inverse_gamma_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_inverse_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::inverse_gamma_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_inverse_gamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::inverse_gamma_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_inverse_gamma_pdf_nvrtc_float.cpp b/test/test_inverse_gamma_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..4d552cf619 --- /dev/null +++ b/test/test_inverse_gamma_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_inverse_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::inverse_gamma_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_inverse_gamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::inverse_gamma_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_inverse_gamma_quan_double.cu b/test/test_inverse_gamma_quan_double.cu new file mode 100644 index 0000000000..c9095d7527 --- /dev/null +++ b/test/test_inverse_gamma_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::inverse_gamma_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch inverse_gamma distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::inverse_gamma_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_inverse_gamma_quan_float.cu b/test/test_inverse_gamma_quan_float.cu new file mode 100644 index 0000000000..3e60feaa18 --- /dev/null +++ b/test/test_inverse_gamma_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::inverse_gamma_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch inverse_gamma distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::inverse_gamma_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_inverse_gamma_quan_nvrtc_double.cpp b/test/test_inverse_gamma_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..a49600bde1 --- /dev/null +++ b/test/test_inverse_gamma_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_inverse_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::inverse_gamma_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_inverse_gamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::inverse_gamma_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_inverse_gamma_quan_nvrtc_float.cpp b/test/test_inverse_gamma_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..f71ed964aa --- /dev/null +++ b/test/test_inverse_gamma_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_inverse_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::inverse_gamma_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_inverse_gamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::inverse_gamma_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_inverse_gaussian.cpp b/test/test_inverse_gaussian.cpp index 68012d48a2..3825da9397 100644 --- a/test/test_inverse_gaussian.cpp +++ b/test/test_inverse_gaussian.cpp @@ -16,8 +16,13 @@ //#include // include directory libs/math/src/tr1/ is needed. -#include +#include +#include "../include_private/boost/math/tools/test.hpp" + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include // for real_concept +#endif + #define BOOST_TEST_MAIN #include // Boost.Test #include @@ -26,7 +31,6 @@ using boost::math::inverse_gaussian_distribution; using boost::math::inverse_gaussian; -#include #include "test_out_of_range.hpp" #include diff --git a/test/test_inverse_gaussian_cdf_double.cu b/test/test_inverse_gaussian_cdf_double.cu new file mode 100644 index 0000000000..3224ff527b --- /dev/null +++ b/test/test_inverse_gaussian_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::inverse_gaussian_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch inverse_gaussian distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::inverse_gaussian_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_inverse_gaussian_cdf_float.cu b/test/test_inverse_gaussian_cdf_float.cu new file mode 100644 index 0000000000..e2abb72dd1 --- /dev/null +++ b/test/test_inverse_gaussian_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::inverse_gaussian_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch inverse_gaussian distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::inverse_gaussian_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_inverse_gaussian_cdf_nvrtc_double.cpp b/test/test_inverse_gaussian_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..1d677010fe --- /dev/null +++ b/test/test_inverse_gaussian_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_inverse_gaussian_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::inverse_gaussian_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gaussian_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_inverse_gaussian_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gaussian_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::inverse_gaussian_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_inverse_gaussian_cdf_nvrtc_float.cpp b/test/test_inverse_gaussian_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..87e1537b89 --- /dev/null +++ b/test/test_inverse_gaussian_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_inverse_gaussian_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::inverse_gaussian_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gaussian_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_inverse_gaussian_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gaussian_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::inverse_gaussian_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_inverse_gaussian_pdf_double.cu b/test/test_inverse_gaussian_pdf_double.cu new file mode 100644 index 0000000000..7f9128037b --- /dev/null +++ b/test/test_inverse_gaussian_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::inverse_gaussian_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch inverse_gaussian distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::inverse_gaussian_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_inverse_gaussian_pdf_float.cu b/test/test_inverse_gaussian_pdf_float.cu new file mode 100644 index 0000000000..3795ff2dbb --- /dev/null +++ b/test/test_inverse_gaussian_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::inverse_gaussian_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch inverse_gaussian distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::inverse_gaussian_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_inverse_gaussian_pdf_nvrtc_double.cpp b/test/test_inverse_gaussian_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..e0f87de0ab --- /dev/null +++ b/test/test_inverse_gaussian_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_inverse_gaussian_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::inverse_gaussian_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gaussian_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_inverse_gaussian_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gaussian_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::inverse_gaussian_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_inverse_gaussian_pdf_nvrtc_float.cpp b/test/test_inverse_gaussian_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..eb124deda9 --- /dev/null +++ b/test/test_inverse_gaussian_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_inverse_gaussian_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::inverse_gaussian_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gaussian_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_inverse_gaussian_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gaussian_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::inverse_gaussian_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_inverse_gaussian_quan_double.cu b/test/test_inverse_gaussian_quan_double.cu new file mode 100644 index 0000000000..2727e53f49 --- /dev/null +++ b/test/test_inverse_gaussian_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::inverse_gaussian_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch inverse_gaussian distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::inverse_gaussian_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_inverse_gaussian_quan_float.cu b/test/test_inverse_gaussian_quan_float.cu new file mode 100644 index 0000000000..2727e53f49 --- /dev/null +++ b/test/test_inverse_gaussian_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::inverse_gaussian_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch inverse_gaussian distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::inverse_gaussian_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_inverse_gaussian_quan_nvrtc_double.cpp b/test/test_inverse_gaussian_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..a72aab4be4 --- /dev/null +++ b/test/test_inverse_gaussian_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_inverse_gaussian_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::inverse_gaussian_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gaussian_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_inverse_gaussian_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gaussian_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::inverse_gaussian_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_inverse_gaussian_quan_nvrtc_float.cpp b/test/test_inverse_gaussian_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..550393f7e1 --- /dev/null +++ b/test/test_inverse_gaussian_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_inverse_gaussian_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::inverse_gaussian_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gaussian_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_inverse_gaussian_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gaussian_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::inverse_gaussian_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_jacobi_zeta.cpp b/test/test_jacobi_zeta.cpp index 77f33efb1e..c64f99580e 100644 --- a/test/test_jacobi_zeta.cpp +++ b/test/test_jacobi_zeta.cpp @@ -4,7 +4,10 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif + #include "test_jacobi_zeta.hpp" // diff --git a/test/test_jacobi_zeta.hpp b/test/test_jacobi_zeta.hpp index 1aa72feb0d..a39d3ba709 100644 --- a/test/test_jacobi_zeta.hpp +++ b/test/test_jacobi_zeta.hpp @@ -8,11 +8,17 @@ // Constants are too big for float case, but this doesn't matter for test. #endif +#include + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include +#endif + #define BOOST_TEST_MAIN #include #include #include +#include #include //#include #include diff --git a/test/test_jacobi_zeta_double.cu b/test/test_jacobi_zeta_double.cu new file mode 100644 index 0000000000..8594da140b --- /dev/null +++ b/test/test_jacobi_zeta_double.cu @@ -0,0 +1,120 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::jacobi_zeta(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::jacobi_zeta(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + int fail_counter = 0; + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 200) + { + std::cerr << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i] << '\n' + << " Host: " << results[i] << '\n' + << " Eps: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl; + fail_counter++; + if (fail_counter > 100) + { + break; + } + } + } + } + + if (fail_counter > 0) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_jacobi_zeta_float.cu b/test/test_jacobi_zeta_float.cu new file mode 100644 index 0000000000..7b473455ad --- /dev/null +++ b/test/test_jacobi_zeta_float.cu @@ -0,0 +1,120 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::jacobi_zeta(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::jacobi_zeta(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + int fail_counter = 0; + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 200) + { + std::cerr << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i] << '\n' + << " Host: " << results[i] << '\n' + << " Eps: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl; + fail_counter++; + if (fail_counter > 100) + { + break; + } + } + } + } + + if (fail_counter > 0) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_jacobi_zeta_nvrtc_double.cpp b/test/test_jacobi_zeta_nvrtc_double.cpp new file mode 100644 index 0000000000..ded2e66571 --- /dev/null +++ b/test/test_jacobi_zeta_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_jacobi_zeta_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::jacobi_zeta(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_jacobi_zeta_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_jacobi_zeta_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_jacobi_zeta_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::jacobi_zeta(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_jacobi_zeta_nvrtc_float.cpp b/test/test_jacobi_zeta_nvrtc_float.cpp new file mode 100644 index 0000000000..de52da118d --- /dev/null +++ b/test/test_jacobi_zeta_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_jacobi_zeta_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::jacobi_zeta(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_jacobi_zeta_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_jacobi_zeta_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_jacobi_zeta_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::jacobi_zeta(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_landau.cpp b/test/test_landau.cpp new file mode 100644 index 0000000000..c69c208177 --- /dev/null +++ b/test/test_landau.cpp @@ -0,0 +1,873 @@ +// Copyright Takuma Yoshimura 2024. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_TEST_MAIN +#define BOOST_TEST_MODULE StatsLandauTest +#include +#include +#include +#include + +#if __has_include() +# include +#endif + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wliteral-range" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Woverflow" +#endif + +using boost::math::landau_distribution; + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +# include + using boost::multiprecision::cpp_bin_float_quad; +#endif + +template +void do_test_landau_pdf(){ + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + landau_distribution dist(static_cast(0), static_cast(1)); + + // Left tail of Landau distribution inherently limits accuracy due to the rapid decay of the function value. + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.23085262843901249987787600097675204323741277288698e-2763), tolerance * 10000); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.43092685592935588160074717060232841559786193515624e-1259), tolerance * 10000); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.07984079126853424748872672315932614660166330244758e-574), tolerance * 4000); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.51902330649665702862232056199116883579782363388698e-261), tolerance * 1000); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.61740160083645125413680224234682096739098348513281e-119), tolerance * 400); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.40268429420551752841762522647338593851384890728514e-54), tolerance * 100); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.64264014182893964260735316412669280242258553581563e-37), tolerance * 100); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.96628298286997713279446280099737948363623849272499e-25), tolerance * 100); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.65014695258589331535696024118503069088391324558285e-17), tolerance * 100); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.52577680004870416264378840612037346351517011876564e-11), tolerance * 40); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.96564073685328676511751663666044321538322085645615e-8), tolerance * 40); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.49195679110887105000100573294850045215882388658736e-5), tolerance * 10); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.83029281547111049679644135145824504175531296639784e-4), tolerance * 4); + + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.50763682207511020788551990942118742909694977020766e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55239934591810979484111502955882518822410677336301e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.11811339800999276275816189148400161297598608940370e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.43803188361205250108898134179433045737003842752098e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.44559645555354621908225839934967437642938589048641e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.19256762240252210606670736977933945194758525070962e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55747609478534232829752995639617852824070107389867e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.90787204474682320276094536207112207798402106183626e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.21762208692280384264052188465103527015057255360522e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.35142856681038695962612277740850797545496160091346e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.46926317225475336949604234617889242016284403135263e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.57038182663311646092167022800387128596042855954974e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.65453539644067140109079490907422208395106068408806e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.72189928839617935958276513066615896148477769341984e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.77299781354951097794140321722510611592577477809046e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.80862887196928314574763226767834515262714169253341e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.82979296472337068393182796635835781531645972141685e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.83762914935252825962037404558307269544013453409236e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.83335939453844198706047097920194738266342072083183e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.81824188244607845783889480215304948211453087161311e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.79353314321623987234651193548815637401927993615201e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.76045845604337224786926223067521339118457783620200e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.72018967730828877761590993107231423707402335516208e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.67382951900254916684467526412940597211696659408252e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.62240126375351657025589608183516471314881294729647e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.56684293488331550048284390907274219670798777890274e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.50800501583089077855151395221936193088464543391697e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.44665091350810416315066697862911894015423062289459e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.38345947054726504652084392231776081590084152956389e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.31902894209756674501863785530002142859374765415539e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.25388195748200100617512573536171938322886836672500e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.18847108191054606649972320910802001046885937665349e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.12318467675064265241298421202132871385510191955048e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.05835282809935995378486652717710487064456876565464e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.99425317296731752992133425091298094257535743484505e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.93111650117890312500196499480137441707124661561802e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.86913205029713491869481191910148340512494429155677e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.80845244177744590342641324845464181512725512270901e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.74919823041494123736512984399481253444800810061398e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.69146205716679200390505774997534008647397580964682e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.63531240868022603475813051802104652763421266792797e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.52794577355035866144175272867439043108097332289280e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.42728268648990888425150769069939371653574963683898e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.33330183140387812967271143318337448709293578674499e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.24583903791438503777896988292725453902854934482082e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.16463504238864475787300731138600083506835647657585e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.08937118376337417303260299595016407050621597377842e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.01969562586719050610986409853288944869103385155807e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.55242261334771588093967856464157010583515904813711e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.40542106335080455284302185631726193959498759868958e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.42466467030240081155174776845818529710340047591917e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.58484837369864336802514847217316987349368653299870e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.86394883380361824062302169684513676586056038951383e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.24319810137134739489567689474925534356381531907667e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.70679858898932226345344507218372775665959269901845e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.24154519527191616123631238437619039275267092832219e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.83643820409470770350079809236512802617514831283958e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.17158939997042257734107084716347218188987519860810e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.65588931293442756274955123891841674957134577995432e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.24999943998132461512162382109845468953569057653206e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.92615450486233381461674333853015589016995103903169e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.66449631910567440889641030798080985765446733145615e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.45062281189987454243698017666196791149621836621069e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(7.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.27394302531621603387264220385271200998719076373774e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(8)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.12656323880287532946687856443190592954884366662750e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(9)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.97272209098442387115881795542777687494895710641442e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(10)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.29822142754406367527698808542596968321101482611499e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(11)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.04211802337476854086853204096220197132228559528499e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(12)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.07803739765703855539464392586871287241424478552475e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(13)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.32333875346680119671101261202737313738117089411627e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(14)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.72232982334134875374745579629692504175963269123550e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(15)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.23647379895109194018623503694593516089449736564227e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(16)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.83847488747490686627461184914507143000166295798209e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(18)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.23229209720541068914767304360845920096605060550204e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(20)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.79947182256974676114073539538738666749318597550126e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(22)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.48016777321149926163568000853843271440982208135806e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(24)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.23815521917416110142990010737370397993296057042020e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(26)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.05051356188948975788809310742705170449199446295996e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(28)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.02189494175551818802120780043908400302499693753383e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(30)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.82977743390109357722237932712596740553146168838163e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(32)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.85767880395157523314894776472286059373130513345678e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(36)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.38436988071020010518465973524617926941561115862543e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(40)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.33733686912592492663864652100824543220296291087705e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(44)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.56719760886187032655497099894304037738414747694058e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(48)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.98455504950479524812911915761096343833926775180855e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(52)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.53330379508737569512814290970113441709685617438320e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(56)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.17681386666988585981678825113751906947724221061916e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(60)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.89036019000414403525553377301848537121144402020797e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(64)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.65677119200426097313111689557286038389044055520291e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(72)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.30262785101049126879812527584486523431435416855092e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(80)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.05075497824543686508583330151701088746046662556142e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(88)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.65309719784826735811404845249006631571011107397937e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(96)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.24862987881373408810690419771199915049078409763957e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(104)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.15969937273460908511004283053502890256890552857434e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(112)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.29851658065131948803621145305770537695198696080417e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(120)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.60580149897015438520712002341005415255634795262650e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.04036158768936643116910162586744782570492472991852e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(256)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.94016243990046849166189460556288227597025594512208e-6), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(512)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.46086028677885635430916943188657821915692719236659e-6), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1024)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.11684517285572531637079275580373275248746393740505e-7), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 11)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.52416040197318095218142515144159361915369773070323e-7), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 12)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.80328507092562833387828627033077037362079287147494e-8), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 13)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.49830894979328205589801770626380874036837651152185e-9), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 14)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.37321260457507890170942420030228551149027146711858e-9), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 15)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.93116689130330056188281287044879751157310863062584e-10), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 16)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.48253875064413349633513149673423379659751420343007e-10), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 17)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.70600576035028939688935887999924497768090063213495e-11), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 18)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.26455685393631638684357809746815117207931914505548e-12), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 19)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.31607812424720075650435345952742430841572532807506e-12), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 20)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.79011407780867471284567547634162699267979186606188e-13), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 21)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.44751775633854916204285667335984781835956253195982e-13), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 22)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.61878017561309094548695350627490496501579775589554e-14), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 23)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.04693171826032841875566733962302122545285900020005e-15), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 24)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.26173047049833308211942124367857471386868403241961e-15), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.65432295369693602811408648241147072923809751085578e-16), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 26)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41358031701599971473805281134574751063094181933632e-16), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 27)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.53395024254254681824332124678431347548990493315457e-17), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 28)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.83487488981301080647710951024578189621167614209000e-18), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 29)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.20871862925450931967259224559170596670053211110073e-18), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 30)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.52179645209930996243372289840247886946662593536877e-19), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 31)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38044909732789001695763980339161333087125784369381e-19), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 32)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.45112272298940668060590155421055597940788628104792e-20), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 34)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.15695169189532579197171519055168026495351913764385e-21), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 36)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.34809480577241011630081955001585893790541249979882e-22), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 38)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.42559253331811413189334788330851699569177453819777e-24), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 40)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.26599533286729634727335420905860617977183492898697e-25), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 42)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.29124708296676552183969416435576047604811850887072e-26), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 44)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.05702942684184448841776719264034349959339461930208e-27), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 46)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28564339177412106630199552439532586497065103592483e-28), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 48)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.03527119858493090854162194048072858076706076597819e-30), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 50)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.02204449911503854915599875932648541933495423396290e-31), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 52)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.13877781194681051697029360385733987849026091896815e-32), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 54)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.96173613246674215642318845771797429014235991071697e-33), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 56)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.22608508279171150505713607335390403334626262247470e-34), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 58)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.66303176744819310534466860343470655112837733629379e-36), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 60)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.78939485465512007489594618239133015330653514470962e-37), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 62)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.99337178415944994713157754959412500112801769504923e-38), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 64)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.87085736509965620084544629248142254049400680133870e-39), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 68)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.30803658242053201409813080005840450554429514097447e-42), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 72)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.85470179000802031747569733233640023770585023325816e-44), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 76)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.11511788672188293650018950543585978468059122696737e-46), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 80)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.35592924500735522070031636324180548468184125270321e-49), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 84)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.70153486133099813308596977841944913997064236009234e-51), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 88)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.64662055207421145736704601944518511418678161667227e-54), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 92)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.59633615315398885053400175162079403845778513781261e-56), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 96)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.01419380982577689473984441890730791873276374028644e-58), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 100)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.96169456963194099507751725745105477863201113021124e-61), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 104)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.54753694126247695120215517859242605013890436588657e-63), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 108)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.04506617680655059063341866635141218965712131228669e-66), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 112)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.36135397531505882446617916654287979923104956976029e-68), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 116)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.22403896607444853307101236930796194301091622511538e-71), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 120)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.60314022112283145823086420676091852871339922342198e-73), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 124)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.40747664887610603837143133076598369654786648237564e-75), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.49795565967228921238840363580462378845964351238859e-78), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.49795565967228921238840363580462378845964351238859e-78), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 136)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.38921456859174989683289128998508268228293005240118e-83), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 144)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28009255502193449353529225005875895420403010118164e-87), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 152)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.95326622775563734975477943429376061127322830738396e-92), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 160)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.98044773522283531151547154891015718272892496044375e-97), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 168)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.54780233035710954515910575700402402149799340844629e-102), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 176)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.93939564568650748467881127472537845077208466864701e-107), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 184)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.05886774378761405711041431804281287395814280222275e-111), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 192)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.61570395475404976976076403509950694878867004733696e-116), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 200)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.46536858330390895044061895004197227293193061422265e-121), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 208)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.76185391739488060064791709906306804341420076633095e-126), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 216)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.74013354094677826026598678445902716585418818104698e-131), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 224)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.75874868918880960123594174874729486977262600867765e-136), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 232)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.33647898699780419940733974437672346035348907603114e-140), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 240)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.03930509490631744294332846737170938164289714970571e-145), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 248)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.11173262772570410605366282252763272345412773087419e-150), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 256)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.74812717853653580635629703144475208046589314403411e-155), tolerance); +} + +template +void do_test_landau_cdf() { + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + landau_distribution dist(static_cast(0), static_cast(1)); + + // Left tail of Landau distribution inherently limits accuracy due to the rapid decay of the function value. + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.23104384186197610398535377648417544130503923472088e-2767), tolerance * 10000); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.13839831060468958638389597955042905895011279005609e-1263), tolerance * 10000); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.44312475898376675389279659203392726913179988221856e-577), tolerance * 4000); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.60161744250590449897633679409320688863793657438650e-264), tolerance * 1000); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.35715172357835600843326068305277508896293574359993e-122), tolerance * 400); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21485044777822987831938208827397125595048645106450e-56), tolerance * 100); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.20700596052912975894895710721207297741913643002012e-39), tolerance * 100); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.68977785355043338853968150291349097131276031028880e-27), tolerance * 100); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.08280503661515358192902976596786876511904900370188e-18), tolerance * 100); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.65792002575428625860974216754165889856915439054540e-13), tolerance * 40); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.10050102765608360408879765554028186586411956977646e-9), tolerance * 40); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.68815894016077213783105942021589578300879813020189e-7), tolerance * 10); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.37625644367407768681214121219960213300943983402353e-5), tolerance * 4); + + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.07114056489178077422539043012078031612755414599729e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.02378089289225642153717146519785629105551593533471e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.86628849116890981757518380028315283124563329691336e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.01356086445894450901462536543096095003540865562528e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.87500433294484935737405475465845379363381335629325e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.14477362642888337899595114315256777323730741351157e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.86347714259952478386139532766899266845283140865196e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.03233833485353985646171581359634291113172343250638e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.61609610406317335842332400044553397267403259400925e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.10447280320452875363390566033897366825208362273976e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.25520504813750944299743577454809066282274647597463e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41278210604233003854999957016294448201095585229262e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.57614904197281705412681135912362432813551089393050e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.74424900339390561779123457277662541525802871208804e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.91604740159711318114657552509684829710278507526176e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.09055135109096066379824832581601920047332653592899e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.26682451996131584587603100587452166035163093744954e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.44399775009966844789856291613140149240683895719411e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.62127593214618528740100724985588598153581542828585e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.79794168130771479096847395825066083537476191027208e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.97335637253762835584202042505471763077481208909960e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.14695907126043189562824247063432944143645211079990e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.31826385110143246791390839018674857130873325623297e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.48685593270949525320895832911820568633181874504622e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.65238701512374797371944390202236332910560491002867e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.81457010849525523978613828303353744448250289768502e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.97317411782647886281236276761522493696113175680302e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.12801837383425412356107316013533713934091886254748e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.27896726012877622151287254340779424837571495657384e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.42592504593813016256394437035333320729928769135529e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.56883100040026405468976154826963286030986607276898e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.70765483748602800807348700535078666283426208025338e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.84239251923251354944511010208659148773285992261636e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.97306242841106189982720542754596269997825882200963e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.09970190928358551894563742001208165448751595738393e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.22236416601074576846725329522076603667792610069772e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.34111550192882688466795267571966376590108144590146e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.45603287874932276581779533145259002587664864932810e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.56720177227709322443036736236667681466823855576748e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.67471430008752600464561254217339060305933972234066e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.77866759641952347636729485475686950346612605081233e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.97630190201768046886571833364686445094779890812683e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.16093363694296970300414411298982724729800950998126e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.33340119589825831941744118804669746898228023705595e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.49453081078357569536385447775302397639174227456126e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.64512181956668895495628599815612188484872430606076e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.78593712196194817010501161391740217697424135219932e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.91769748027973587988050487247823119315646010788285e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.04107862044208783622223577234526499721208130809085e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.26517695571963502030771662018373539384937999898516e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.46273425405663094259919090350425049446731674380517e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.63758376080325935199241149369372014196158210794207e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.79296673358868394263845167623854238605663977145579e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.93161494811251760796024569455411548871678150466575e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.05582884526567009703468807923041602465523081207835e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.16754715036218405381848078882559199991501527210887e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.26840681332434061224397365001110201431714574161004e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.44288421192372625436336887550283154670590764164969e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.58804227080862094903927728732613232538756639404536e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.71029700052871041881284261613115047981162112317573e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.81440531622793612780308998937907666985394243647458e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.90394591759649482553720310181182407678855125658003e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.98164927634689821536641290454567745500263396394663e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(7.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.04962661826577677675750333226666504803112871241155e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(8)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.10953085258825170758957718650744990707007748273190e-1), tolerance); +} + +template +void do_test_landau_ccdf() { + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + landau_distribution dist(static_cast(0), static_cast(1)); + + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-2))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.99292885943510821922577460956987921968387244585400e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.97976219107107743578462828534802143708944484064665e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.95133711508831090182424816199716847168754366703087e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.89864391355410554909853746345690390499645913443747e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.81249956670551506426259452453415462063661866437068e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.68552263735711166210040488568474322267626925864884e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.51365228574004752161386046723310073315471685913480e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.29676616651464601435382841864036570888682765674936e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.03839038959368266415766759995544660273259674059908e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.9375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.89552719679547124636609433966102633174791637726024e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.74479495186249055700256422545190933717725352402537e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.8125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.58721789395766996145000042983705551798904414770738e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.42385095802718294587318864087637567186448910606950e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.6875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.25575099660609438220876542722337458474197128791196e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.08395259840288681885342447490315170289721492473824e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.5625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.90944864890903933620175167418398079952667346407101e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.73317548003868415412396899412547833964836906255046e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.4375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.55600224990033155210143708386859850759316104280589e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.37872406785381471259899275014411401846418457171415e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.3125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.20205831869228520903152604174933916462523808972792e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.02664362746237164415797957494528236922518791090040e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.1875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.85304092873956810437175752936567055856354788920010e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.68173614889856753208609160981325142869126674376703e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.0625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.51314406729050474679104167088179431366818125495378e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.34761298487625202628055609797763667089439508997133e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.0625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.18542989150474476021386171696646255551749710231498e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.02682588217352113718763723238477506303886824319698e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.1875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.87198162616574587643892683986466286065908113745252e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.72103273987122377848712745659220575162428504342616e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.3125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.57407495406186983743605562964666679270071230864471e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.43116899959973594531023845173036713969013392723102e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.4375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.29234516251397199192651299464921333716573791974662e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.15760748076748645055488989791340851226714007738364e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.5625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.02693757158893810017279457245403730002174117799037e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.90029809071641448105436257998791834551248404261607e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.6875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.77763583398925423153274670477923396332207389930228e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.65888449807117311533204732428033623409891855409854e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.8125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.54396712125067723418220466854740997412335135067190e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.43279822772290677556963263763332318533176144423252e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.9375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.32528569991247399535438745782660939694066027765934e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.22133240358047652363270514524313049653387394918767e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.02369809798231953113428166635313554905220109187317e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.83906636305703029699585588701017275270199049001874e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.66659880410174168058255881195330253101771976294405e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.50546918921642430463614552224697602360825772543874e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.35487818043331104504371400184387811515127569393924e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.21406287803805182989498838608259782302575864780068e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.08230251972026412011949512752176880684353989211715e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(2))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.95892137955791216377776422765473500278791869190915e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(2.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.73482304428036497969228337981626460615062000101484e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(2.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.53726574594336905740080909649574950553268325619483e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(2.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.36241623919674064800758850630627985803841789205793e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(3))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.20703326641131605736154832376145761394336022854421e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(3.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.06838505188748239203975430544588451128321849533425e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(3.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.94417115473432990296531192076958397534476918792165e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(3.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.83245284963781594618151921117440800008498472789113e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(4))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.73159318667565938775602634998889798568285425838996e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(4.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55711578807627374563663112449716845329409235835031e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41195772919137905096072271267386767461243360595464e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(5.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28970299947128958118715738386884952018837887682427e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(6))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.18559468377206387219691001062092333014605756352542e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(6.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.09605408240350517446279689818817592321144874341997e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(7))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.01835072365310178463358709545432254499736603605337e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(7.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.50373381734223223242496667733334951968871287588448e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(8))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.90469147411748292410422813492550092929922517268105e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(9))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.89904178519375419646729692713027452766956304810423e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(10))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.08967063825624536726393015854367927398248532043928e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(11))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.42552960269528172637745151674852945361026757129572e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.87155991603325430279966781361705985837149401924358e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(13))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.40297179699419503759356245733027728020687579530781e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.00178775061898975362196226343808042189550214610720e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(15))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.65467961100606469227978737829290437667112287748795e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.35157264931262089761621934621402648953813567255367e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(18))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.84807181681993477724365847791930879377149510380933e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.44719141256297825765636022076703604777398281522436e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(22))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.12076533048217442252783357779404422496811288961954e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(24))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.84999992100002427148546326508663805148388380497970e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(26))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.62189540632143171631461472152513248987041904718627e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(28))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.42718378313424593748302632910098340183096121887744e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(30))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.25908549106503559691958010353777133320615316433912e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(32))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.11253031965493064317003259449214452744697000964364e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(36))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.86945568960396455137037115431688883900622129241921e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(40))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.67614509428664592918239573763993132586708338097148e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(44))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.51880213758692367036733937634308433186738240289411e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(48))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38828350661980875444222979837389265249992909781147e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(52))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.27829420024685114258602774435891775801837844690349e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(56))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.18436085060190590919501220038943005140750783789755e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(60))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.10321854418155548568175979136702978995567636390268e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(64))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.03242932255189471157625857327944947615628400066886e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(72))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.14901073563432515854150573798153430678589620957278e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(80))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.21304799594562916903235283213953478595851140543801e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(88))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.45021033455396813779806884621309261695988381836678e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(96))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.81662081970375351344432645398857600988792278643367e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(104))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.28205524656921301117582740567146809114392866561466e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(112))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.82502022536734166688286248837161828142031676980706e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(120))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.42981578145699797251974720633677573816663490439276e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.08470827750959570805565668839133674990776529552199e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.51875926400324844459822808369891291211052352126700e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(512))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.25243798184186193073508388843632555296804735961418e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1024))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.24222666597149382309278694960823422316591681354937e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 11))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.11546656864925721385400506911714668228522959074988e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55615661646764419866569094853675423840407090817980e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 13))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.77642694561457848096607089076550607777923417509897e-5), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.88702045347145575328162812652239839108265167027629e-5), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 15))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.94318591305792177162247519775866592794374293269769e-5), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.71505353250968890581644704744607604904036466331858e-6), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 17))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.85729142998688039060495209150963975787147384530557e-6), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 18))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.42858279647060361837158514176420066606677711840386e-6), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 19))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21427464712490151175367196840586966659512986509712e-6), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.07132880360935675845351982773327267915998066915668e-7), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 21))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.03565265516245957916479321487698564090018898313690e-7), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 22))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.51782323124974173858669998521646771346131838415381e-7), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 23))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.58910801622650317351735034158503382297981706223572e-8), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 24))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.79455187330704471571840926063516392410676855026673e-8), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.89727537800148613712401816975344760624742045931026e-8), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 26))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.48637543100068173667331269634864926724205314304352e-9), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 27))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.74318733515442703770226252397963326193722512832281e-9), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 28))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.37159356859216986241429458639296184020669300874754e-9), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 29))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.18579675857518154364748998529259019922036982534955e-9), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 30))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.92898372613704146131858586390099325449659727514527e-10), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 31))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.96449184577465200097456706940455493448424599625049e-10), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 32))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.48224591841157074706460083370955443589105978681308e-10), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 34))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.70561478725616554450782995120929832467409189826579e-11), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 36))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.26403696230051280181276990208319690098486010067856e-12), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 38))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.31600924018782655652761250713319794226217154394605e-12), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 40))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.79002310021356047142575775154740548981748412961642e-13), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 42))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.44750577503651834833347404492486283164453776697661e-13), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 44))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.61876443758020639017543150875739203546579850550506e-14), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 46))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.04691109394324465958560412889011979187196424398413e-15), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 48))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.26172777348533543325299108739056247374472601114618e-15), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 50))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.65431943371302795435372191153510741742082062236667e-16), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 52))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41357985842823674325840669958475387008031322630868e-16), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 54))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.53394964607057868542015717421467433254478008250729e-17), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 56))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.83487411517643815597510900540151314745726776276537e-18), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 58))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.20871852879410898385647064972035961074675685323943e-18), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 60))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.52179632198527209999982790078399565586115610945978e-19), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 62))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38044908049631800172983942388255378445601347506470e-19), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 64))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.45112270124079498928544180507695627740193986743475e-20), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 68))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.15695168827549686524997208584894563633442321098092e-21), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 72))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.34809480517218554065439979557746061380338130338956e-22), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 76))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.42559253232615962903750225978472507493251963237152e-24), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 80))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.26599533270384976814627294052855716278192786930198e-25), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 84))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.29124708293990610509133147452548575130220036516594e-26), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 88))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.05702942683744131568207851461222366485608388136168e-27), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 92))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28564339177340082230129892190826718627845694793237e-28), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 96))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.03527119858375513938311820075537974687844953537787e-30), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 100))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.02204449911484696211444887297771791655738720297691e-31), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 104))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.13877781194677935132153054550953911471083914854259e-32), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 108))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.96173613246673709457595659093933570122399892106597e-33), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 112))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.22608508279171068410997286933691737981629848926873e-34), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 116))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.66303176744819177568733043335566577797931906017599e-36), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 120))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.78939485465511985980458152084728836560995745045025e-37), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 124))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.99337178415944991237786345052955511752869068868957e-38), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.87085736509965619523616465658097194397480643905015e-39), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.87085736509965619523616465658097194397480643905015e-39), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 136))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.30803658242053201264126818976942165494933536744296e-42), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 144))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.85470179000802031743799538662868033396263440343802e-44), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 152))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.11511788672188293649921694790182825545415091296783e-46), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 160))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.35592924500735522070006620274151662286777695301906e-49), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 168))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.70153486133099813308596336044590493080772537219152e-51), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 176))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.64662055207421145736704437674181613596767723512183e-54), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 184))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.59633615315398885053400170966477192811237391996946e-56), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 192))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.01419380982577689473984441783780153441889606248807e-58), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 200))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.96169456963194099507751725717891224382381274409403e-61), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 208))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.54753694126247695120215517858551259524367685316173e-63), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 216))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.04506617680655059063341866634965857517061270766301e-66), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 224))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.36135397531505882446617916654283538092602058893086e-68), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 232))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.22403896607444853307101236930795070674226792551118e-71), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 240))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.60314022112283145823086420676091824482119840840280e-73), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 248))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.40747664887610603837143133076598368938328062828235e-75), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.49795565967228921238840363580462378665343995422791e-78), tolerance); +} + +template +void do_test_landau_quantile_nearzero() { + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + landau_distribution dist(static_cast(0), static_cast(1)); + + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.03125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.37666142124664870427736212608782014348092232942703e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.16727296241754547290632950718657110865371011883488e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.09375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.01093317611810417245689707499918702758654056685650e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.77109518013577849065583862782160134594408514905936e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.15625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -7.55147864363163697663604936395052484592948168261254e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.39830737061008092918841929339491480637487863539219e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.21875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.28068260002115642528019835718799250663117082682413e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.17764764050720242897742634974454174978089714315837e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.28125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.07332710058680816601395586920882835843791948709923e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.95448171270527198765810574858446724358448730774699e-1), tolerance); + + // Relative error decreases near the root. + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.34375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.09096582988749397639311472333288802663134525410086e-2), tolerance * 10); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.74557416577759248536854968412795127813716796439183e-2), tolerance * 10); + + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.40625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.60865269786540938863996714417993425933643390360701e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.90647080532071673100911236876323489196027799942628e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.46875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.28310640852292004320545764804480585932279888826464e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.75630143945078214396279308922575172688187402052064e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.53125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.34751649957416497391423738312790099231331963193670e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.08338732735341567163440035550389980881822036530441e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.59375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.09977916217503813825408682237719484284367467701518e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.31348919222343858173602105619413807214737739605647e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.65625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55537778016228176635489075791437025790508273018206e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.83358201486120130332744845371832465533289389791770e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.71875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.15968268411007811908706240304578653239207877720217e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.55081568282045925871949387822806899205587760602861e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.78125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.03356573722052416577951184486453626014326770622663e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.65170109428623528831428068089551728119930570543517e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.84375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.48307637103506875745328267812359172615004959210209e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.68160868054034088524891526884683024288168516466432e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.90625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.60235856635301171960591513042024596858455255485505e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.12983583770683756688253603663598582344177172936943e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.96875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.19714287107110229779819680655029619622254635434578e1), tolerance); +} + +template +void do_test_landau_quantile_lower() { + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + landau_distribution dist(static_cast(0), static_cast(1)); + + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -3)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.77109518013577849065583862782160134594408514905936e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -4)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.16727296241754547290632950718657110865371011883488e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -5)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.37666142124664870427736212608782014348092232942703e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -6)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.53937945687640569563878953898367418895418286968892e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -7)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.67193057806636817209385515556513905369159305215132e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -8)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.78348038398799867332294266481364804262899249535026e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -10)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.96398712546539547900868300204740158406402253850740e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -12)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.10672834330580928387396530276881975373360375562766e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -14)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.22455234225296140347183338705883529117630663916504e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -16)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.32474749499506228416012679106564721359216355271060e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -20)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.48880222197195858123676152921728490355916489072920e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -24)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.62012266244208104039475013076613716084830146896364e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -28)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.72950770317262402197221324973838006547359087175177e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -32)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.82318656228158372073367735499500997206084370651970e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -40)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.97778747939490003780916437343292886429806927997697e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -48)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.10253941103391799610961378724535481625827036745191e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -56)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.20706965504048009454149883353144814083641366738928e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -64)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.29700011190686230364493911161520662289614047608888e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -80)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.44616534249558131319374859709618689440842790052762e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -96)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.56714397893479094507321490741760621098238204564655e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -112)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.66888826418737343076580628480419340009764391741662e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -128)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.75666995985336007747791649448887717847452667971341e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -160)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.90272386559500090814947503869007637264448398505091e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -192)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.02154690255519314578158695811138813197102888432118e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -224)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.12170243205222902851790251956181678240666087312573e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -256)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.20826069989721596260510558511263030379850705902299e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -320)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.35254674338923723387138280464388000015841039600555e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -384)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.47014796872616976009165833857464389146621180677518e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -448)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.56940486432189047495132801898571896454756873328884e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -512)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.65527239540648657446629479052874024150007745617425e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -640)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.79856256497052731627737920479094219620428520302497e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -768)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.91547732272180866354170476973276286391282782030491e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -896)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.01423042502756027207185677104980034013809957436307e0), tolerance); + + // The test is terminated because p = 0 after this in fp64. + if(N <= 53){ + return; + } + + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -1024)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.09971143249822249471944441552701750744803726485204e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -1280)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.24244558247619917128857214117595378487244291900164e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -1536)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.35897805692044314674029465486888992071638929853610e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -1792)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.45745119510512195777968048015384189694395480185084e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -2048)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.54271778755494231572464179212263712867955955556373e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -2560)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.68514415447058604503373799640504720298538164608415e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -3072)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.80146552760710346457341432134646648154958472048223e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -3584)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.89978436824284434538360953970051552537044157804319e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -4096)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.98493298246627952401490656857159297529934857258768e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -5120)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.12719032649420608124001447392893816734788637468278e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -6144)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.24339602515596353391422898038238569270302761832424e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -7168)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.34163046670299704746360546090336718985578875063595e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -8192)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.42671464308364892089984144203590287404801291341155e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -10240)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.56887983067842563164669431493815471028291272019257e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -12288)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.68502258391108555173104319503169367212234002338911e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -14336)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.78321117119788305968192558554365316820805283962570e0), tolerance); +} + +template +void do_test_landau_quantile_upper() { + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + landau_distribution dist(static_cast(0), static_cast(1)); + + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.75630143945078214396279308922575172688187402052064e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.46875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.34751649957416497391423738312790099231331963193670e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.4375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.08338732735341567163440035550389980881822036530441e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.40625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.09977916217503813825408682237719484284367467701518e0), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.31348919222343858173602105619413807214737739605647e0), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.34375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55537778016228176635489075791437025790508273018206e0), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.3125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.83358201486120130332744845371832465533289389791770e0), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.28125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.15968268411007811908706240304578653239207877720217e0), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.55081568282045925871949387822806899205587760602861e0), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.21875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.03356573722052416577951184486453626014326770622663e0), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.1875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.65170109428623528831428068089551728119930570543517e0), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.15625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.48307637103506875745328267812359172615004959210209e0), tolerance); + + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -3))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.68160868054034088524891526884683024288168516466432e0), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -4))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.12983583770683756688253603663598582344177172936943e1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.19714287107110229779819680655029619622254635434578e1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -6))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.28089946846201448479186405990829330152038273940336e1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -7))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.40066013407733304728124190173163568113870838023174e1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -8))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.65941656579816517264930679482280545790703168918131e2), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -10))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.55753077855274500558851746639632584040672341383365e2), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.61233279175261252785344820794971528959731572291875e3), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.04359994051668217545134727183452999841836322543240e4), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.17280170760481128545941744972976378785527707571987e4), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.67552483213163052181906697701099131817014739251016e5), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -24))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.06807174647511043368265795578221114430723661526053e7), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -28))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.70891330693062153503879914631960898643362032945242e8), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -32))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.73426111586976542228936210986127280848086481329720e9), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -40))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.99970842207360156603062974124919047698394307869345e11), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -48))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.79192535600728707050158319385235151176991681631485e14), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -56))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.58732891137812933185516807516215074813520642490265e16), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -64))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.17435620131280049336696928005840807489266101470318e19), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -80))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.69626080092356929518666088104130567689669972090860e23), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -96))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.04382147849327037329330237397632338076523897478536e28), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -112))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.30551884414534967184149864106941373195910184715021e33), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.16630482969909636093804454941121895928783047320959e38), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -160))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.30420839672446839098151322191224148323438073311282e47), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -192))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.99612707791001852622513406287046677525461975485236e57), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -224))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.71632351102835736008910691332442626839731739200064e67), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.37155334922269019018360983857591746072979653277477e76), tolerance); +} + +template +void do_test_landau_locscale_param() { + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + landau_distribution dist_0_1(static_cast(0), static_cast(1)); + landau_distribution dist_1_3(static_cast(1), static_cast(3)); + landau_distribution dist_0_pihalf(static_cast(0), boost::math::constants::pi() / 2); + + BOOST_CHECK_CLOSE(entropy(dist_0_1), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.3726364400044818244844049010588577710), tolerance); + BOOST_CHECK_CLOSE(entropy(dist_1_3), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.3726364400044818244844049010588577710) + log(static_cast(3)), tolerance); + BOOST_CHECK_CLOSE(entropy(dist_0_pihalf), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.8242191452939366892106001309537399145), tolerance); + + BOOST_CHECK_CLOSE(median(dist_0_1), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.57563014394507821439627930892257517269), tolerance); + BOOST_CHECK_CLOSE( + median(dist_1_3), + (1 + 3 * (BOOST_MATH_BIG_CONSTANT(RealType, N, 0.57563014394507821439627930892257517269) + 2 / boost::math::constants::pi() * log(static_cast(3)))), + tolerance + ); + BOOST_CHECK_CLOSE(median(dist_0_pihalf), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.3557804209908013250320928093906509105), tolerance); + + BOOST_CHECK_CLOSE(mode(dist_0_1), BOOST_MATH_BIG_CONSTANT(RealType, N, -0.42931452986133525016556463510885028346), tolerance); + BOOST_CHECK_CLOSE( + mode(dist_1_3), + (1 + 3 * (BOOST_MATH_BIG_CONSTANT(RealType, N, -0.42931452986133525016556463510885028346) + 2 / boost::math::constants::pi() * log(static_cast(3)))), + tolerance + ); + BOOST_CHECK_CLOSE(mode(dist_0_pihalf), BOOST_MATH_BIG_CONSTANT(RealType, N, -0.222782981256408504061824283124805665631673572953417648794046), tolerance); + + BOOST_CHECK_CLOSE(pdf(dist_0_1, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.16353124086802260347581305180210465276342), tolerance); + BOOST_CHECK_CLOSE(pdf(dist_1_3, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.09034495984298569669047391552324642172044), tolerance); + BOOST_CHECK_CLOSE(pdf(dist_0_pihalf, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.14520663709640194253543670745173917702186), tolerance); + + BOOST_CHECK_CLOSE(cdf(dist_0_1, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.57786675964195234763672948547568695034661), tolerance); + BOOST_CHECK_CLOSE(cdf(dist_1_3, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.17119311431882309272302517476020685228892), tolerance); + BOOST_CHECK_CLOSE(cdf(dist_0_pihalf, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.45101809281952585982591839302215356640746), tolerance); + + BOOST_CHECK_CLOSE(cdf(dist_0_1, quantile(dist_0_1, static_cast(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.25), tolerance); + BOOST_CHECK_CLOSE(cdf(dist_1_3, quantile(dist_1_3, static_cast(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.25), tolerance); + BOOST_CHECK_CLOSE(cdf(dist_0_pihalf, quantile(dist_0_pihalf, static_cast(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.25), tolerance); + + BOOST_CHECK_CLOSE(cdf(dist_0_1, quantile(dist_0_1, static_cast(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.75), tolerance); + BOOST_CHECK_CLOSE(cdf(dist_1_3, quantile(dist_1_3, static_cast(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.75), tolerance); + BOOST_CHECK_CLOSE(cdf(dist_0_pihalf, quantile(dist_0_pihalf, static_cast(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.75), tolerance); +} + +BOOST_AUTO_TEST_CASE(landau_pdf_fp64) +{ + do_test_landau_pdf(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(landau_pdf_fp64_std) +{ + do_test_landau_pdf(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(landau_pdf_fp128) +{ + do_test_landau_pdf(); +} +#endif + +BOOST_AUTO_TEST_CASE(landau_cdf_fp64) +{ + do_test_landau_cdf(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(landau_cdf_fp64_std) +{ + do_test_landau_cdf(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(landau_cdf_fp128) +{ + do_test_landau_cdf(); +} +#endif + +BOOST_AUTO_TEST_CASE(landau_ccdf_fp64) +{ + do_test_landau_ccdf(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(landau_ccdf_fp64_std) +{ + do_test_landau_ccdf(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(landau_ccdf_fp128) +{ + do_test_landau_ccdf(); +} +#endif + +BOOST_AUTO_TEST_CASE(landau_quantile_nearzero_fp64) +{ + do_test_landau_quantile_nearzero(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(landau_quantile_nearzero_fp64_std) +{ + do_test_landau_quantile_nearzero(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(landau_quantile_nearzero_fp128) +{ + do_test_landau_quantile_nearzero(); +} +#endif + +BOOST_AUTO_TEST_CASE(landau_quantile_lower_fp64) +{ + do_test_landau_quantile_lower(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(landau_quantile_lower_fp64_std) +{ + do_test_landau_quantile_lower(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(landau_quantile_lower_fp128) +{ + do_test_landau_quantile_lower(); +} +#endif + +BOOST_AUTO_TEST_CASE(landau_quantile_upper_fp64) +{ + do_test_landau_quantile_upper(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(landau_quantile_upper_fp64_std) +{ + do_test_landau_quantile_upper(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(landau_quantile_upper_fp128) +{ + do_test_landau_quantile_upper(); +} +#endif + +BOOST_AUTO_TEST_CASE(landau_locscale_fp64) +{ + do_test_landau_locscale_param(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(landau_locscale_fp64_std) +{ + do_test_landau_locscale_param(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(landau_locscale_fp128) +{ + do_test_landau_locscale_param(); +} +#endif + diff --git a/test/test_landau_cdf_double.cu b/test/test_landau_cdf_double.cu new file mode 100644 index 0000000000..40bff707d8 --- /dev/null +++ b/test/test_landau_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::landau_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::landau_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_landau_cdf_float.cu b/test/test_landau_cdf_float.cu new file mode 100644 index 0000000000..c4513c0844 --- /dev/null +++ b/test/test_landau_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::landau_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::landau_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_landau_cdf_nvrtc_double.cpp b/test/test_landau_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..f23a758e12 --- /dev/null +++ b/test/test_landau_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_landau_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::landau_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_landau_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_landau_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_landau_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::landau_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_landau_cdf_nvrtc_float.cpp b/test/test_landau_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..1a339724e8 --- /dev/null +++ b/test/test_landau_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_landau_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::landau_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_landau_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_landau_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_landau_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::landau_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_landau_pdf_double.cu b/test/test_landau_pdf_double.cu new file mode 100644 index 0000000000..6ce3f5f784 --- /dev/null +++ b/test/test_landau_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::landau_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::landau_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_landau_pdf_float.cu b/test/test_landau_pdf_float.cu new file mode 100644 index 0000000000..5818ddf8a5 --- /dev/null +++ b/test/test_landau_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::landau_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::landau_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_landau_pdf_nvrtc_double.cpp b/test/test_landau_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..1e8df7d07e --- /dev/null +++ b/test/test_landau_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_landau_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::landau_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_landau_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_landau_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_landau_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::landau_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_landau_pdf_nvrtc_float.cpp b/test/test_landau_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..1e8df7d07e --- /dev/null +++ b/test/test_landau_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_landau_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::landau_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_landau_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_landau_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_landau_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::landau_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_landau_quan_double.cu b/test/test_landau_quan_double.cu new file mode 100644 index 0000000000..4995bd49cf --- /dev/null +++ b/test/test_landau_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::landau_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::landau_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 5000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_landau_quan_float.cu b/test/test_landau_quan_float.cu new file mode 100644 index 0000000000..4995bd49cf --- /dev/null +++ b/test/test_landau_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::landau_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::landau_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 5000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_landau_quan_nvrtc_double.cpp b/test/test_landau_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..f4a5b95534 --- /dev/null +++ b/test/test_landau_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_landau_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::landau_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_landau_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_landau_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_landau_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::landau_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_landau_quan_nvrtc_float.cpp b/test/test_landau_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..4a10b497cf --- /dev/null +++ b/test/test_landau_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_landau_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::landau_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_landau_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_landau_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_landau_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::landau_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_laplace.cpp b/test/test_laplace.cpp index df79c8a584..965c4c6575 100644 --- a/test/test_laplace.cpp +++ b/test/test_laplace.cpp @@ -1,7 +1,7 @@ // Copyright Thijs van den Berg, 2008. // Copyright John Maddock 2008. // Copyright Paul A. Bristow 2008, 2009, 2014. - +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -62,7 +62,7 @@ Test 8: test_extreme_function_arguments() #define BOOST_TEST_MAIN #include -#include +#include "../include_private/boost/math/tools/test.hpp" #include #include #include "test_out_of_range.hpp" diff --git a/test/test_laplace_cdf_double.cu b/test/test_laplace_cdf_double.cu new file mode 100644 index 0000000000..ec3c83ecde --- /dev/null +++ b/test/test_laplace_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::laplace_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::laplace_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_laplace_cdf_float.cu b/test/test_laplace_cdf_float.cu new file mode 100644 index 0000000000..96acea2fda --- /dev/null +++ b/test/test_laplace_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::laplace_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::laplace_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_laplace_cdf_nvrtc_double.cpp b/test/test_laplace_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..9d35b5862a --- /dev/null +++ b/test/test_laplace_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_laplace_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::laplace_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_laplace_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_laplace_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_laplace_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::laplace_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_laplace_cdf_nvrtc_float.cpp b/test/test_laplace_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..aacf59cb07 --- /dev/null +++ b/test/test_laplace_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_laplace_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::laplace_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_laplace_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_laplace_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_laplace_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::laplace_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_laplace_pdf_double.cu b/test/test_laplace_pdf_double.cu new file mode 100644 index 0000000000..568be622b5 --- /dev/null +++ b/test/test_laplace_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::laplace_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::laplace_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_laplace_pdf_float.cu b/test/test_laplace_pdf_float.cu new file mode 100644 index 0000000000..cb2aa67c11 --- /dev/null +++ b/test/test_laplace_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::laplace_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::laplace_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_laplace_pdf_nvrtc_double.cpp b/test/test_laplace_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..49c3864159 --- /dev/null +++ b/test/test_laplace_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_laplace_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::laplace_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_laplace_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_laplace_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_laplace_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::laplace_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_laplace_pdf_nvrtc_float.cpp b/test/test_laplace_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..2100e2cdcd --- /dev/null +++ b/test/test_laplace_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_laplace_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::laplace_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_laplace_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_laplace_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_laplace_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::laplace_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_laplace_quan_double.cu b/test/test_laplace_quan_double.cu new file mode 100644 index 0000000000..ec3c83ecde --- /dev/null +++ b/test/test_laplace_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::laplace_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::laplace_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_laplace_quan_float.cu b/test/test_laplace_quan_float.cu new file mode 100644 index 0000000000..96acea2fda --- /dev/null +++ b/test/test_laplace_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::laplace_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::laplace_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_laplace_quan_nvrtc_double.cpp b/test/test_laplace_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..cf02db5a53 --- /dev/null +++ b/test/test_laplace_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_laplace_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::laplace_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_laplace_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_laplace_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_laplace_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::laplace_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_laplace_quan_nvrtc_float.cpp b/test/test_laplace_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..36472aaa25 --- /dev/null +++ b/test/test_laplace_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_laplace_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::laplace_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_laplace_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_laplace_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_laplace_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::laplace_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_lgamma_double.cu b/test/test_lgamma_double.cu new file mode 100644 index 0000000000..776ff5d271 --- /dev/null +++ b/test/test_lgamma_double.cu @@ -0,0 +1,102 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::lgamma(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::lgamma(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_lgamma_float.cu b/test/test_lgamma_float.cu new file mode 100644 index 0000000000..101037ab30 --- /dev/null +++ b/test/test_lgamma_float.cu @@ -0,0 +1,102 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::lgamma(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::lgamma(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_log1p_double.cu b/test/test_log1p_double.cu new file mode 100644 index 0000000000..d164b5a98d --- /dev/null +++ b/test/test_log1p_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::log1p(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::log1p(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_log1p_float.cu b/test/test_log1p_float.cu new file mode 100644 index 0000000000..d164b5a98d --- /dev/null +++ b/test/test_log1p_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::log1p(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::log1p(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_log1p_nvrtc_double.cpp b/test/test_log1p_nvrtc_double.cpp new file mode 100644 index 0000000000..36b0771b1b --- /dev/null +++ b/test/test_log1p_nvrtc_double.cpp @@ -0,0 +1,186 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +extern "C" __global__ +void test_log1p_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::log1p(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_log1p_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_log1p_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_log1p_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::log1p(h_in1[i]); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_log1p_nvrtc_float.cpp b/test/test_log1p_nvrtc_float.cpp new file mode 100644 index 0000000000..7194ffb56a --- /dev/null +++ b/test/test_log1p_nvrtc_float.cpp @@ -0,0 +1,188 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +extern "C" __global__ +void test_log1p_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::log1p(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_log1p_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_log1p_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_log1p_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::log1p(h_in1[i]); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_log1p_simple.cpp b/test/test_log1p_simple.cpp new file mode 100644 index 0000000000..ef6c204d4c --- /dev/null +++ b/test/test_log1p_simple.cpp @@ -0,0 +1,48 @@ +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include "math_unit_test.hpp" + +constexpr int N = 50000; + +template +void test() +{ + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0, 0.01); + + for (int n = 0; n < N; ++n) + { + const T value (dist(rng)); + CHECK_ULP_CLOSE(std::log1p(value), boost::math::log1p(value), 10); + } +} + +template +void test_log1pmx() +{ + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0, 0.01); + + for (int n = 0; n < N; ++n) + { + const T value (dist(rng)); + CHECK_ULP_CLOSE(std::log1p(value) - value, boost::math::log1pmx(value), 1e9); + } +} + +int main() +{ + test(); + test(); + + test_log1pmx(); + test_log1pmx(); + + return boost::math::test::report_errors(); +} diff --git a/test/test_logistic_cdf_double.cu b/test/test_logistic_cdf_double.cu new file mode 100644 index 0000000000..6b4e850259 --- /dev/null +++ b/test/test_logistic_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::logistic_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::logistic_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_logistic_cdf_float.cu b/test/test_logistic_cdf_float.cu new file mode 100644 index 0000000000..75b6ab0afd --- /dev/null +++ b/test/test_logistic_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::logistic_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::logistic_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_logistic_cdf_nvrtc_double.cpp b/test/test_logistic_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..5548dc412d --- /dev/null +++ b/test/test_logistic_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_logistic_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::logistic_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_logistic_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_logistic_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_logistic_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::logistic_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_logistic_cdf_nvrtc_float.cpp b/test/test_logistic_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..ea65cc97cb --- /dev/null +++ b/test/test_logistic_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_logistic_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::logistic_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_logistic_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_logistic_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_logistic_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::logistic_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_logistic_dist.cpp b/test/test_logistic_dist.cpp index 3bb092ce7e..c5114adbd4 100644 --- a/test/test_logistic_dist.cpp +++ b/test/test_logistic_dist.cpp @@ -1,6 +1,6 @@ // Copyright 2008 Gautam Sewani // Copyright 2013 Paul A. Bristow - +// Copyright 2024 Matt Borland // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt @@ -12,11 +12,14 @@ #endif #include -#ifndef BOOST_NO_EXCEPTIONS -#define BOOST_MATH_UNDERFLOW_ERROR_POLICY throw_on_error -#define BOOST_MATH_OVERFLOW_ERROR_POLICY throw_on_error +#include + +#if !defined(BOOST_NO_EXCEPTIONS) && !defined(BOOST_MATH_NO_EXCEPTIONS) +# define BOOST_MATH_UNDERFLOW_ERROR_POLICY throw_on_error +# define BOOST_MATH_OVERFLOW_ERROR_POLICY throw_on_error #endif -#include + +#include "../include_private/boost/math/tools/test.hpp" #include // for real_concept #include using boost::math::logistic_distribution; diff --git a/test/test_logistic_pdf_double.cu b/test/test_logistic_pdf_double.cu new file mode 100644 index 0000000000..90232a2d6a --- /dev/null +++ b/test/test_logistic_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::logistic_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::logistic_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_logistic_pdf_float.cu b/test/test_logistic_pdf_float.cu new file mode 100644 index 0000000000..0a99ff9cf1 --- /dev/null +++ b/test/test_logistic_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::logistic_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::logistic_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_logistic_pdf_nvrtc_double.cpp b/test/test_logistic_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..d287225cf6 --- /dev/null +++ b/test/test_logistic_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_logistic_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::logistic_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_logistic_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_logistic_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_logistic_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::logistic_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_logistic_pdf_nvrtc_float.cpp b/test/test_logistic_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..9339a6db36 --- /dev/null +++ b/test/test_logistic_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_logistic_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::logistic_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_logistic_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_logistic_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_logistic_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::logistic_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_logistic_quan_double.cu b/test/test_logistic_quan_double.cu new file mode 100644 index 0000000000..afe8a4c8cd --- /dev/null +++ b/test/test_logistic_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::logistic_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::logistic_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_logistic_quan_float.cu b/test/test_logistic_quan_float.cu new file mode 100644 index 0000000000..92c371062f --- /dev/null +++ b/test/test_logistic_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::logistic_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::logistic_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_logistic_quan_nvrtc_double.cpp b/test/test_logistic_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..f763db82cc --- /dev/null +++ b/test/test_logistic_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_logistic_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::logistic_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_logistic_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_logistic_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_logistic_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::logistic_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_logistic_quan_nvrtc_float.cpp b/test/test_logistic_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..b14c3c5daa --- /dev/null +++ b/test/test_logistic_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_logistic_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::logistic_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_logistic_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_logistic_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_logistic_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::logistic_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_lognormal.cpp b/test/test_lognormal.cpp index 759944b396..6fa4a6aa26 100644 --- a/test/test_lognormal.cpp +++ b/test/test_lognormal.cpp @@ -8,14 +8,19 @@ // test_lognormal.cpp +#include + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include // for real_concept +#endif + #define BOOST_TEST_MAIN #include // Boost.Test #include #include using boost::math::lognormal_distribution; -#include +#include "../include_private/boost/math/tools/test.hpp" #include "test_out_of_range.hpp" #include diff --git a/test/test_lognormal_cdf_double.cu b/test/test_lognormal_cdf_double.cu new file mode 100644 index 0000000000..288240a43c --- /dev/null +++ b/test/test_lognormal_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::lognormal_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch lognormal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::lognormal_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_lognormal_cdf_float.cu b/test/test_lognormal_cdf_float.cu new file mode 100644 index 0000000000..d9411a1b4a --- /dev/null +++ b/test/test_lognormal_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::lognormal_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch lognormal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::lognormal_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_lognormal_cdf_nvrtc_double.cpp b/test/test_lognormal_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..0ab15206c9 --- /dev/null +++ b/test/test_lognormal_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_lognormal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::lognormal_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_lognormal_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_lognormal_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_lognormal_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::lognormal_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_lognormal_cdf_nvrtc_float.cpp b/test/test_lognormal_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..308e8c85e3 --- /dev/null +++ b/test/test_lognormal_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_lognormal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::lognormal_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_lognormal_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_lognormal_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_lognormal_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::lognormal_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_lognormal_pdf_double.cu b/test/test_lognormal_pdf_double.cu new file mode 100644 index 0000000000..67bb63a2cd --- /dev/null +++ b/test/test_lognormal_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::lognormal_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch lognormal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::lognormal_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_lognormal_pdf_float.cu b/test/test_lognormal_pdf_float.cu new file mode 100644 index 0000000000..ac8382dfd1 --- /dev/null +++ b/test/test_lognormal_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::lognormal_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch lognormal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::lognormal_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_lognormal_pdf_nvrtc_double.cpp b/test/test_lognormal_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..1799559330 --- /dev/null +++ b/test/test_lognormal_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_lognormal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::lognormal_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_lognormal_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_lognormal_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_lognormal_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::lognormal_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_lognormal_pdf_nvrtc_float.cpp b/test/test_lognormal_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..f66c8bf4bb --- /dev/null +++ b/test/test_lognormal_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_lognormal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::lognormal_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_lognormal_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_lognormal_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_lognormal_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::lognormal_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_lognormal_quan_double.cu b/test/test_lognormal_quan_double.cu new file mode 100644 index 0000000000..056177e006 --- /dev/null +++ b/test/test_lognormal_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::lognormal_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch lognormal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::lognormal_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_lognormal_quan_float.cu b/test/test_lognormal_quan_float.cu new file mode 100644 index 0000000000..65a9188d77 --- /dev/null +++ b/test/test_lognormal_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::lognormal_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch lognormal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::lognormal_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_lognormal_quan_nvrtc_double.cpp b/test/test_lognormal_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..ddde58fc0b --- /dev/null +++ b/test/test_lognormal_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_lognormal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::lognormal_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_lognormal_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_lognormal_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_lognormal_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::lognormal_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_lognormal_quan_nvrtc_float.cpp b/test/test_lognormal_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..e121f3b380 --- /dev/null +++ b/test/test_lognormal_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_lognormal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::lognormal_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_lognormal_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_lognormal_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_lognormal_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::lognormal_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_mapairy.cpp b/test/test_mapairy.cpp new file mode 100644 index 0000000000..ca3b415d76 --- /dev/null +++ b/test/test_mapairy.cpp @@ -0,0 +1,957 @@ +// Copyright Takuma Yoshimura 2024. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wliteral-range" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Woverflow" +#endif + +#define BOOST_TEST_MAIN +#define BOOST_TEST_MODULE StatsMapAiryTest +#include +#include +#include + +#include + +#if __has_include() +# include +#endif + +using boost::math::mapairy_distribution; + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +#include +using boost::multiprecision::cpp_bin_float_quad; +#endif + +template +void do_test_mapairy_pdf(){ + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + mapairy_distribution dist(static_cast(0), static_cast(1)); + + // Left tail of MapAiry distribution inherently limits accuracy due to the rapid decay of the function value. + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-52)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.48902525259161778456663373601227358266560067430936e-4524), tolerance * 10000); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-48)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.35000261887613150163386664453709286782071190547944e-3558), tolerance * 1000); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-44)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.59514743738055878289124765721120558977841779893312e-2741), tolerance * 1000); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-40)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.23001358749520957481536587430803343649600724408224e-2059), tolerance * 1000); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-36)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.91092696891623714443869745182216876382044809172723e-1501), tolerance * 1000); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-32)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.07670398427978297595825050147335321586451776148320e-1054), tolerance * 1000); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-30)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.75337241594382145623417946757631570270349308241803e-869), tolerance * 1000); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-28)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.98271669611043473474221732621841457710581601420544e-707), tolerance * 1000); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-26)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.16498740545788718692214006589654128484512724342869e-566), tolerance * 1000); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-24)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.49681712247552092430360122510975673204884714780707e-445), tolerance * 1000); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-22)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.55053727524864779623317373572781282350111182284876e-343), tolerance * 1000); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-20)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.19590456107919919197512531538035620765530980528894e-258), tolerance * 1000); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-18)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.73694941159984922185033097915349420991674156548250e-188), tolerance * 100); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-16)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.81449463353684665334966063303815524281822160159818e-132), tolerance * 100); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-15)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.74974309038596237884318081144949974855326622002492e-109), tolerance * 100); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-14)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.29136035353058491211769990284207292488489027379633e-89), tolerance * 100); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-13)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.01585226509378718416615473646349537501789989363229e-71), tolerance * 100); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-12)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.37033445628502141399361852883285362383866808742814e-56), tolerance * 100); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-11)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.34096664947790338152990789779350354526301978444227e-43), tolerance * 100); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-10)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.68877771535989487461767283800177475346958322976577e-33), tolerance * 100); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-9)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.82006696453724258217996541433935771573414304428384e-24), tolerance * 10); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-8)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.54482240843563125964619012431713490038548382437138e-17), tolerance * 10); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-7.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.95446089249867191439127373027848930673014745742088e-14), tolerance * 10); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.50933920988971071711086365057147867083089476835078e-12), tolerance * 10); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.93525639583557913222186453754518161964863338679333e-10), tolerance * 10); + + #ifndef BOOST_MATH_HAS_GPU_SUPPORT + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.34369873917936310843493426518284206307542912325702e-8), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.77757355685380561185988935905104213909281485602180e-6), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.67935873875498302175093073530783568485155988744416e-5), tolerance); + #else + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.34369873917936310843493426518284206307542912325702e-8), tolerance*10); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.77757355685380561185988935905104213909281485602180e-6), tolerance*10); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.67935873875498302175093073530783568485155988744416e-5), tolerance*10); + #endif + + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.63150314678587224494834533488274597847689803128076e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.66981984951457236403520646020207894651500649740558e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.04268591430185285447948956767040247962753669747157e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.09373105986325625209096934627967360395341037677581e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.80640911685196781581446750934825559197579090806069e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.30714423198107251239523977966407204142168461729995e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.58842018103237928265884912263898874481595562824878e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.34619157601942089113215976120224270739528013289579e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.75682912797714827638251175725141830254079472660215e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.14483832832989822788477500521594411868042501441052e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.31592413389237158115773602554741285666601187136132e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.46510720433737601145589738058516645303764778406120e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.58858272589879457467907897007761708777068403816847e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.68368758744508533942159519264649183356982627934537e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.74896964572402100481706929301679979523752439534115e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.78416838013401425639828509948809230256694219667844e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.79011912859672711819203848701778087056321687064703e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.76859868856746781256050397658493368372419755176282e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.74830251162716136956231178987638601629634664517919e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.72213275423897601118140737012980725371274046892426e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.69048759814952957802002736887143054018790117979041e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.65378566438926186069214998647211849395648980215839e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.61245973171416933849635378258875498386938909727468e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.56695079986621470612303095505427278554320952987612e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.51770257100814033767420472605045312962380868746169e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.46515640532722451246144916562599607636382596292905e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.40974679016952551097229293394786875981693451661862e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.35189734654776422654774161240658760813142609084491e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.29201738275575132039254286232840244115188891902544e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.23049899232453590038819913858072064402640465533782e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.16771468279209758359211730425922824067563080471965e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.10401551277763824950435892543895916698505857289016e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.03972970763820126963564357339894388732551496850922e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.97516171847191855609649452292217911972760948598791e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.91059168530983397246791731314734572731339598377834e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.84627526286757244484016179988853282946752058004097e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.78244376604908808362842960720569088074360938826161e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.71930459233559687354598878881397561629142415087483e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.65704187907786028551352925788813828181432590627815e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.59581735536596915694662484173754591721771767208970e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.53577135041201613422320904399972678346505180911383e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.47702392309337834830220258805531647259773671641587e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41967608032785834866384133800904772318199442922830e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.36381105516273814949846883397888219175600292125024e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.30949561875080790965818583863955676147392613436720e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.25678140366749341891056123744605385205177726623077e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.20570621922063209990039201192595153753061823668443e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.15629534246005614282003528019134390685312636509026e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.10856277146400733535248998121026984388464547942987e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.06251243013238748252181151646220197947016365256337e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.75430585815861237530788049942964394941727146945119e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.94921730852144630624722291210866171228958719672719e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.20763203888240939750507494683136041919542710442553e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.52664687715163418854003417614711690443203995990429e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.90290701026305425949257079944431253582616228954748e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.33278922127799799766017618158404889945497518669482e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.81254742548053622180113078981721846479230408163441e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.33842514891989443409465171800884519330814876941244e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.51394330031190703409753192401513286012367671189008e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.83173172055121251708819783948372658040389943407125e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.26731989005960717430587605328985374107046652534330e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.79973178629260510506067822703511722927370913396772e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.41138621807489585364575132430814179891782893106309e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.08776953835588218788258294328943072921113243548837e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.81702027546943420543987006667270192025143215867522e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.58950538583133457383574346194006716984204842764692e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.23449076237421124835496017947109115185689559096654e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.76628876438498186161275834549825153942245265285021e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.85705896089536983465980519032147649251164182381151e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.41725094843861993121315653435763289609915116903808e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.31264184292047732621870128909412110588268320306673e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.45166594303360764259552410935368398129332569193241e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(7.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.77078907451534652348813394683088320326083698637357e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(8)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.22517551525042172427941302520759668293351400085609e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(9)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.41981613066309674421788083490352250077859372362907e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(10)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.86815272152296425461960094149288322349331776254317e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(11)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.47672748088907799783338276475559452864086595323508e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(12)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.19065885055336418361425251181752339740309873019493e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(13)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.76274498770011155322885494753359606185503116800378e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(14)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.12117840379514058437502004817680010914158504614072e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(15)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.84060120900265238260840046704576012273231257957218e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(16)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.82527663232857270992129793621400616909426228224712e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(18)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.34356222389517260476288187086191333487351565998487e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(20)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.33976274743353308671561884595553840397953308913098e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(22)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.63275420187782102031440426363742618331512228841827e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(24)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.11866107787483058586064754989935066085956111714438e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(26)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.73477843721978250663679288968512001020601996296490e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(28)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.44160680253022056601252575834792105383788377162879e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(30)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21335317476130403409545135558158224084771179668233e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(32)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.03264853379349880038687006045193401398742550912390e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(36)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.69348218890827802115199892879428047102258557679491e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(40)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.91237960791142062915636488977731277854393252956156e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(44)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.65910753386639565315739798326227665091960854776722e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(48)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.74840664437921635017204117045344765826413564522111e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(52)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.06868586281732253998273705361085730421392207516563e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(56)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.54975706173930172767473002839017573406030702226132e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(60)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.14583740785288043496760695039190976938266337954109e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(64)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.82612140613373038344483207825545571090342007879025e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(72)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.36036300551703042854867518460643847165588663407845e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(80)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.04535842750631349697889823754314492592824987833936e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(88)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.23732650061248767465314076860143478427557447053122e-6), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(96)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.62699739226146233957434830649961751777393343487704e-6), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(104)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.42516447612776691413186917108911633335528994759200e-6), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(112)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.50767100225890854218096000852153526257706640350006e-6), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(120)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.79354212598752921854694346382153053933709178870365e-6), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.22829848834607910587947957072698363475405049514861e-6), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(256)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.70691063363569289518344600523741530561019748614806e-7), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(512)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.00884949274594535609095332297684714775260624951825e-7), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1024)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.78341094639115825398956813042728307580960455542744e-8), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 11)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.15265496830848341113806943808080759239756631409525e-9), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 12)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.57315927453204720744811655603371636021315489208197e-10), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 13)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.85204679078224624495444948682547526667095102524654e-11), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 14)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.74161227361870459284450362091832192677055853279409e-11), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 15)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.07876462219180661771716253718191722284989080940137e-12), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 16)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.44253835507444012610805302754717855328014016707006e-13), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 17)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.62113944435292862305033337145279742309373660330666e-14), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 18)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.70079323596084060715031479698836179832962876226040e-14), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 19)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.00660607636030744539992938919621601865776189138704e-15), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 20)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.31497886237763070924617652541844323609885629317601e-16), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 21)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.39564398862596160919395415621974601387332621717152e-17), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 22)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.66093089449300961525223017779109302708950139673730e-17), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 23)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.93613874644561300698599726959494859627890174964948e-18), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 24)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.19040904529065504857204743126112225011195501281143e-19), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.17543358264254064703206600829360293655988827362640e-20), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 26)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.62200282665332970268320245960144424095679956397013e-20), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 27)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.86732299457579395219850121616992176031753071031156e-21), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 28)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.06875883329165532088522436776488158979648490753308e-22), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 29)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.96038435804935610062036418083267399916819002834560e-23), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 30)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.58398713540364228777663367294171284220596445493878e-23), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 31)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.80012011189042378144386404030074611545380892760386e-24), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 32)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.94995979813638214930198027960375045149298281432705e-25), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 34)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.54686243691761942165686883762842249372631955082800e-26), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 36)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.83394511536756069267771511760113721074800848019252e-28), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 38)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.51060784855236271646178597425041551955920802058680e-29), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 40)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.72064952672613348894308116953255143520457854922274e-31), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 42)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.47520297710191671529471286547892233784021035466379e-32), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 44)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.61000930344348973529597770462163230645079307893137e-34), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 46)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.44062790732609054227999303269426009576929146861433e-35), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 48)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.50196221039403294462497822716956279927920276478346e-37), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 50)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.40686319074813529519530569599048837477475167906008e-38), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 52)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.39644747108792279748533029997027617117109903686087e-40), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 54)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.37388983471497587421416571874071130349096844921335e-41), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 56)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.29340573348429960691926787106472282340927640380120e-43), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 58)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.34168929171384362716227120970772588231539887618792e-44), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 60)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.19277903660576133488209753033664338223562148808726e-46), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 62)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.31024344893930041715065547823020105694863171502727e-47), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 64)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.09451077793531380359579836946937830296447410946021e-49), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 68)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.99854568157745488632402184518493974898874424751974e-52), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 72)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.90482976716548328742580258318841772362182055421850e-55), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 76)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.81331031949754227287676033514493918322443413497900e-58), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 80)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.72393585888431862585621126478997967111761145994043e-61), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 84)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.63665611219171740806270631327146452257579244134808e-64), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 88)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.55142198456222403131123663405416457282792230600398e-67), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 92)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.46818553179904690557737952544352009065226787695701e-70), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 96)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.38689993339750674372790969281593758852760534859083e-73), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 100)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.30751946620850267942178680939056405129648959823324e-76), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 104)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.22999947871924089787283868104547270634422812327464e-79), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 108)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.15429636593675868932894402445846943978928527663539e-82), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 112)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.08036754486011590754779689888522406229422390296425e-85), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 116)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.00817143052745694096464540906760162333420303023853e-88), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 120)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.93766741262446966891078653229257971028730764671731e-91), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 124)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.86881583264108366104568997294197237332744887374738e-94), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.80157796156355826273993161420114489582758679076892e-97), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.80157796156355826273993161420114489582758679076892e-97), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 136)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.67179294735294176362984811229814996321448020054714e-103), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 144)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.54802031264585663187966166715445514985511798910822e-109), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 152)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.42998152985177672565427939143605723367225455199072e-115), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 160)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.31741097436120674672534884589772914283013777922699e-121), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 168)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.21005532680626558945212254133007921488775041506480e-127), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 176)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.10767300301195677705013517506606980789923707491379e-133), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 184)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.01003361035533597664845960146529179372714717379932e-139), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 192)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.91691742930921170868726692339448146221842496280605e-145), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 200)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.82811491900368853443838779773185869428484436302762e-151), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 208)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.74342624569290975040282039426027173450931965163004e-157), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 216)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.66266083306590056457788505006816075755054440653805e-163), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 224)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.58563693338956886728085045821014476542524758008771e-169), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 232)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.51218121851879965522847219296469189207577474602480e-175), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 240)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.44212838985328641436431140228718938071801638224106e-181), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 248)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.37532080636337891994887485722273767539788854812723e-187), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 256)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.31160812984788791651618467066072242297924856960987e-193), tolerance); +} + +template +void do_test_mapairy_cdf() { + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + mapairy_distribution dist(static_cast(0), static_cast(1)); + + // Left tail of MapAiry distribution inherently limits accuracy due to the rapid decay of the function value. + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-52)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41267677661910675305382637108866964194971791718950e-4526), tolerance * 10000); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-48)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.54257458395442655204313046627840914088415076161558e-3561), tolerance * 1000); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-44)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.76526114681351440657658256501506814774341295354922e-2743), tolerance * 1000); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-40)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.27125192542763584917976723724200097302881304411026e-2062), tolerance * 1000); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-36)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.63420353908123838151105660636393196705034472693701e-1504), tolerance * 1000); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-32)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.73063520387773809387189485017024200015891995562535e-1057), tolerance * 1000); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-30)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.87621737485296751690587747650586494006422187478425e-871), tolerance * 1000); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-28)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.15431208733487028595846723973676765010037279135068e-709), tolerance * 1000); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-26)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.43691228284359420616981379531226120901046405612148e-568), tolerance * 1000); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-24)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.94968725712637201733674598027292284194096042437396e-447), tolerance * 1000); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-22)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.29903053626343464859342616262182157628847143496274e-345), tolerance * 1000); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-20)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.84047255379287327139957588241933355294847277196332e-260), tolerance * 1000); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-18)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.79693358121995588307456046556605766916709047503732e-190), tolerance * 100); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-16)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.18430997009344263598977403572421080974427275299628e-134), tolerance * 100); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-15)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.48855014561480471542019201883561279219078606560987e-111), tolerance * 100); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-14)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21188569601559235190706821985880880180873775432009e-90), tolerance * 100); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-13)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.35130675597097791570481439579882762106711974084282e-73), tolerance * 100); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-12)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.37868088180310978639226395953519655144561270921646e-58), tolerance * 100); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-11)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.96213643222050268066173675729245650285594216946076e-45), tolerance * 100); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-10)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.54299664164424690506837069440249011982208536500223e-34), tolerance * 100); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-9)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55256887328573175640232009727759270424877561672436e-25), tolerance * 10); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-8)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.76658378173553845515260323095527685644595481193099e-18), tolerance * 10); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-7.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.53962634794926085939153812168733146892427038438756e-15), tolerance * 10); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.86644224834640735242565183060109673582728544145215e-13), tolerance * 10); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.03383983622833649608546095853620837892086215933298e-10), tolerance * 10); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.91537779419133030075704775849634109699414218730481e-9), tolerance * 10); + + #ifndef BOOST_MATH_HAS_GPU_SUPPORT + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.98085764775551392183308859729746156105641469279537e-7), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.74005376900861403666100209810033834996576655180465e-6), tolerance); + #else + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.98085764775551392183308859729746156105641469279537e-7), tolerance * 10); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.74005376900861403666100209810033834996576655180465e-6), tolerance * 10); + #endif + + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38210242545410097936371084112947842536958832190953e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.20388922732913226923590659641162717163243937441333e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.01190141003747964176020229987886598385662366685178e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.81314867410709227363443913418400543623685087660036e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.40335466769957406990026955582239481134043242080576e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.65057217408705944017729221733489193253150025104939e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.62249422314997336809404422537950480283670862386341e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.49463093988108215731880519187728405325669137953076e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.13731490830425435301322069056843269169838775278008e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.62598955251978523174755901843430986522046460911826e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.90499210557846036762061906872202100639194335579553e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.20405659651083834253744598353980626247223640040816e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.52019615753011340929652761968369152398962550184606e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.85001847038770817549042011993996310385733185736928e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.18987376977895959785022298515900161162070947963445e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.53600587820316569763364664186012515534206385253539e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.88469588484092929432583134493879046529813482615104e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.23238998449671083670041452413316011919930693856385e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.40482473418234444080637631800355214131813723348029e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.57580541347059341976751497432836530189703719631253e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.74497724348320019407539810613151808302214091800752e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.91201100390145330029548239186193815674350921221373e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.07660411254690680532566892253363954450731338298810e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.23848132271235424597841060643701240046967758423031e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.39739506217816326324441201522899717272689507530078e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.55312544188864070398568309403385234827691551635647e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.70547996523808722092270387778468756135745550977842e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.85429297088468989836573146625524898390641430572229e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.99942484305170440309173928292359597658678248461166e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.14076102348433999265595009037280955266566735857487e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.27821085871238004955876961091597273535850920355153e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.41170631513311055182782737833523982566081113132113e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.54120059278782917662600347641177702821526512639253e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.66666666666666666666666666666666666666666666666667e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.78809578204369927830516637473281842472755486735670e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.90549592781258492544813432997757039278246435622280e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.01889030914802776673544656107973638254340107333208e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.12831583813566709661955692651725308671816974125125e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.23382165835700226601867127791408227947098667962247e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.33546771684027578538449554398877013955180783305967e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.43332339433579641844979454704458465657719551526672e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.52746620257793576893322261719616438461321552900402e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.61798055507971738758762860873629690865112979477773e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.70495661608502783177600997939957544744070827966369e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.78848923058628458126731690124829998626066330325893e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.86867693680400430433267457602884639416039914905443e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.94562106121611865967124973894234486976489176337189e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.01942489511196041360944230423166502583801465481058e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.09019295071828335416784977458008517243666113680032e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.15803029418984060112492565010063896412665755747313e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.28533238809939722865056318522346970220432633092471e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.40216194581500702945839843944471675879234503695234e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.50932751104023843947651194694738668613497033313653e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.60760530927220029947644215642926175488027901722942e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.69773224168421631436187338078646751800825779589535e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.78040142637134263427912246171411479166776409276684e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.85625978983111842670865940159150361843054620140300e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.92590726602475875901684499549667744162908005254207e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.04873777288161758991649770533103505875566036067315e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.15278911856248478088063341213669352039898456875983e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.24130503839415290663064766600863877055306793445333e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.31696102735179394495180164200593784488582138327694e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.38195112746199551037885830108115610693678956104594e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.43806900885896605694390358343940460845582243989715e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.48677954836791634391273767979450406335892640585045e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.52927980046477119746205391036666872656611789316554e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.59939373386293535847669182576818465955927373035467e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.65433945362867879413538593557411941750837354200812e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.69816672594606569822299986511094581346810669802925e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.73368884642273321841166642302379087929797275815602e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.76289558464640301369422872101450331194067414657857e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.78721979671508698647720200666528542996105699216801e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(7.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.80771139145909061814002155447578879059651577452407e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(8)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.82515243555548699954994382507757214540918643206410e-1), tolerance); +} + +template +void do_test_mapairy_ccdf() { + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + mapairy_distribution dist(static_cast(0), static_cast(1)); + + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-2))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.37401044748021476825244098156569013477953539088174e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.09500789442153963237938093127797899360805664420447e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.79594340348916165746255401646019373752776359959184e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.47980384246988659070347238031630847601037449815394e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.14998152961229182450957988006003689614266814263072e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.81012623022104040214977701484099838837929052036555e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.46399412179683430236635335813987484465793614746461e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.11530411515907070567416865506120953470186517384896e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.76761001550328916329958547586683988080069306143615e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.9375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.59517526581765555919362368199644785868186276651971e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.42419458652940658023248502567163469810296280368747e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.8125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.25502275651679980592460189386848191697785908199248e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.08798899609854669970451760813806184325649078778627e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.6875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.92339588745309319467433107746636045549268661701190e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.76151867728764575402158939356298759953032241576969e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.5625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.60260493782183673675558798477100282727310492469922e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.44687455811135929601431690596614765172308448364353e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.4375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.29452003476191277907729612221531243864254449022158e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.14570702911531010163426853374475101609358569427771e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.3125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.00057515694829559690826071707640402341321751538834e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.85923897651566000734404990962719044733433264142513e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.1875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.72178914128761995044123038908402726464149079644847e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.58829368486688944817217262166476017433918886867887e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.0625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.45879940721217082337399652358822297178473487360747e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.33333333333333333333333333333333333333333333333333e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.0625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.21190421795630072169483362526718157527244513264330e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.09450407218741507455186567002242960721753564377720e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.1875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.98110969085197223326455343892026361745659892666792e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.87168416186433290338044307348274691328183025874875e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.3125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.76617834164299773398132872208591772052901332037754e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.66453228315972421461550445601122986044819216694033e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.4375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.56667660566420358155020545295541534342280448473328e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.47253379742206423106677738280383561538678447099598e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.5625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.38201944492028261241237139126370309134887020522227e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.29504338391497216822399002060042455255929172033631e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.6875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.21151076941371541873268309875170001373933669674107e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.13132306319599569566732542397115360583960085094557e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.8125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.05437893878388134032875026105765513023510823662811e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.98057510488803958639055769576833497416198534518943e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.9375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.90980704928171664583215022541991482756333886319968e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.84196970581015939887507434989936103587334244252687e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.71466761190060277134943681477653029779567366907529e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.59783805418499297054160156055528324120765496304766e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.49067248895976156052348805305261331386502966686347e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.39239469072779970052355784357073824511972098277058e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.30226775831578368563812661921353248199174220410465e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21959857362865736572087753828588520833223590723316e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.14374021016888157329134059840849638156945379859700e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(2))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.07409273397524124098315500450332255837091994745793e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(2.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.51262227118382410083502294668964941244339639326852e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(2.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.47210881437515219119366587863306479601015431240172e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(2.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.58694961605847093369352333991361229446932065546671e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(3))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.83038972648206055048198357994062155114178616723061e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(3.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.18048872538004489621141698918843893063210438954059e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(3.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.61930991141033943056096416560595391544177560102847e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(3.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.13220451632083656087262320205495936641073594149555e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(4))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.70720199535228802537946089633331273433882106834460e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(4.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.00606266137064641523308174231815340440726269645335e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.45660546371321205864614064425880582491626457991880e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(5.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.01833274053934301777000134889054186531893301970748e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(6))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.66311153577266781588333576976209120702027241843979e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(6.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.37104415353596986305771278985496688059325853421426e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(7))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.12780203284913013522797993334714570038943007831995e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(7.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.92288608540909381859978445524211209403484225475927e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(8))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.74847564444513000450056174922427854590813567935902e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(9))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.46888494885704354461170804278826523218710845325600e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(10))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.25613324230091750631240813030902908673461241410450e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(11))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.08995384276796746727925033729177141093311272472945e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.57298412261448730944183018356841954457807894385431e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(13))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.49445667426015051170301411155410826055184233951431e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.60377136717752997401167765152949642869947243569953e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(15))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.85823720627621671017207210698248665551570440429385e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.22684103170563193014558918295924551172698239430713e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(18))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.22006950358287273393523413555812293504310335665744e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.45787594574479193246071067008198432980227781543963e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(22))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.86454025983790677529906306805195677315975322722262e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(24))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.39200041143645646258373331865031120883657317932172e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(26))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.00844464598329521611240330284790908003180945122524e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(28))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.69207105975977872414400493979073211647180301757125e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(30))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.42749222811426799055113000065152671289607878504149e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(32))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.20357145727036120652264700679701054983338793783565e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(36))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.84678186992189202260619598427428218285343162114238e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(40))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.57685005349633837416913019835892196933717853056275e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(44))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.36681189477727165566612776659042216546642387956225e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(48))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.19958501309529382285703116836639459985687034231775e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(52))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.06387729083711703707125561075502300925516821745411e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(56))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.51956726422504037060971384488455660747488001979504e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(60))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.58369730588727742464054792329377206933983130862391e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(64))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.79171138168694768852946218859426934990849762414603e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(72))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.52989549449674176739274234303186543768254475184699e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(80))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.57534022322054445175812334907544905864245720421642e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(88))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.83262692804541386527340439387556377914021496108680e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(96))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.24132027619974492546489695559384126869565934766815e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(104))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.76147662885863808608468523190827912794216909524203e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(112))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.36574864341415676513997111497521230889735520043816e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(120))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.03484906797876224319052497399923183994442800112441e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.75482620393356950171417803995157642981745916059418e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.73979922776430555363554632966080889507162029820752e-5), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(512))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.44353982639936003430898773389958569528340214901924e-5), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1024))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21747521599099094720054911524728128905331418743558e-5), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 11))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.30442492111514986227593224497576120783344940470632e-6), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.52184402609265962838062304205455964936542503385355e-6), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 13))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.38053115409151479601077504716814204054241984396579e-7), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.90230503273504174320645323659877052404533613091567e-7), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 15))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.72566394266708055503723315148592640827491725732841e-8), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.37788129092113064009710124961268854372530077851514e-8), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 17))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.40707992833487973793766069223107728401530584918012e-9), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 18))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.97235161365145877787702193738699569533619914113956e-9), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 19))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.05088499104186197709404571098840228587836479911802e-9), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.71543951706432436058369237052221627684485231071286e-10), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 21))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.31360623880232751062247585210413319083311528997997e-10), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 22))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.64429939633040546807800247387131067206560952703714e-11), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 23))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.64200779850290938904479244625064763530742033320990e-11), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 24))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.80537424541300683543633877614223483555123688801843e-12), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.05250974812863673632096512092113144155816177586613e-12), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 26))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.25671780676625854430204135462707277286762193086669e-13), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 27))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.56563718516079592040149887308711394838177046916383e-13), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 28))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.07089725845782318037768094883949095101477347530653e-14), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 29))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.20704648145099490050187930370138656919662848438144e-14), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 30))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.13386215730722789754721037105719349775366357126605e-14), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 31))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.00880810181374362562734924119592255004499293746830e-15), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 32))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41732769663403487193401296875220001924045479390391e-15), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 34))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.77165962079254358991751621103655291754760707304047e-16), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 36))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.21457452599067948739689526379757206282312287607911e-17), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 38))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.76821815748834935924611907974700181516735308796565e-18), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 40))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.46027269686043669905764884968375298647166107661462e-19), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 42))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.32534087107554587382206106210469124710349176992174e-20), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 44))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.40667608884443234227757632763086405915307399803018e-21), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 46))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.75834511105554042784697040953858007394668838202264e-22), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 48))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.44793138881942553480871301192322509243346488933465e-23), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 50))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.05599142360242819185108912649040313655418331509614e-23), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 52))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.31998927950303523981386140811300392069272914785317e-24), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 54))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.64998659937879404976732676014125490086591143489426e-25), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 56))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.06248324922349256220915845017656862608238929361934e-26), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 58))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.57810406152936570276144806272071078260298661702420e-27), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 60))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.22263007691170712845181007840088847825373327128025e-28), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 62))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.02828759613963391056476259800111059781716658910032e-29), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 64))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.03535949517454238820595324750138824727145823637540e-30), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 68))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.86774921121022248157180194922091913636165349433656e-32), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 72))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.22933581425159726274559405456576861505650835849009e-33), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 76))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.92083720976812072303999071025901346102579431014076e-35), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 80))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.00130814026268862974998548477970853285280360959494e-37), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 84))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.68954396916045098398435231996829458258250563999209e-39), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 88))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.32741245181320466247555049995046028528516506248764e-41), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 92))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.14490819559581322851180476561725941957580704101369e-42), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 96))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.78891905561845816954969494627696784308719850158390e-44), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 100))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.79518602440384088992139835355776225482374765872484e-46), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 104))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.36747816313100139050218492743400352316210571675756e-48), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 108))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.82418462989218967265966394911563050494079018243369e-50), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 112))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.06627884842065463635307249204931726639699846600526e-51), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 116))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.66606070065727286930167576882705822874531010313323e-53), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 120))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.60321984477698885828386838879227848241454703614566e-55), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 124))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.06753100746404509106854435748793512877272974397760e-57), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.35551719916257045479460055857489863870739022496500e-59), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.35551719916257045479460055857489863870739022496500e-59), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 136))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55163994120179942744008802699582486296567144164185e-62), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 144))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.78818345019970563339865240965777554434978379307091e-66), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 152))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.24849475146412508153967873451605357507271433855203e-70), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 160))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.25793328893167116248527312854395839235173689906056e-73), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 168))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.51253244368083779903631134898427341882748266372206e-77), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 176))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.34583311613301704078034945043561362764342838469777e-80), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 184))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.28572538118412363471765002547757233311383882982854e-84), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 192))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.02179048140655184257238775751360432889120808063607e-88), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 200))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.95844494174964644594052435486171980685820509781154e-91), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 208))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.78135972106847276840948328823662062221241478957896e-95), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 216))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.16732415065148260947497150591714370659482782948705e-98), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 224))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.84991247717647121453850465311802662742877888058362e-102), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 232))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.95779413373161917611939612577643219587104218892485e-106), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 240))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.69868020843057108791977444476963676657007865940548e-109), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 248))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.14716847761369894511663682805087101213398110206416e-113), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.01249230410490696902261641309835718069677272999613e-116), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 600))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.71969536983273370677446521953393512020163902133418e-272), tolerance); + + // The test is terminated because x is inf after this in fp64. + if (N <= 53) { + return; + } + + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 10000))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41571262853590848961675783559267306723234880166889e-4516), tolerance); +} + +template +void do_test_mapairy_quantile_nearzero() { + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + mapairy_distribution dist(static_cast(0), static_cast(1)); + + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.03125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.92947985498880164288717582971263069265106409267331e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.59822399410385083283727681965013516508738702183587e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.09375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.37051518906738394785544825118798831695789651589080e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.18765177572396470161180571018467021827812222940306e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.15625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.02990438632621036358966845969387883330987649629695e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.88799753980914934718534117636680696659950540760708e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.21875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.75672646260182080486757173789442594487758358719568e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.63281240925531315038207673147576301428806562043208e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.28125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.51400400121400801502802449510679455584862826208973e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.39864166928988824635332108295073034590821798923232e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.34375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.28542241490194660184907680173328998444992309715534e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.17326074020471664204142312429732771994689840460362e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.40625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.06119903663447640834652691309939508582577539512003e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -9.48344198262277235851026749871350785648904607023049e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.46875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.33817641975610807828267883212039917352738594297812e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, -7.16710685455022053317001962780672309444401429230030e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.53125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.96039220307812911673187933977720502663701213461352e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.70691924008504753346480857177819427470334982130948e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.59375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.39365150798529936746993802101957358628812489096556e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.00474815142578902619056852805926625285378475306847e-1), tolerance); + + // Relative error decreases near the root. + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.65625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.20300033010874226241969971846939668616524880409485e-2), tolerance * 4); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.08557609474704600193050942980969400222443967430903e-1), tolerance * 4); + + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.71875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.84775355628419671941908878470333245751721020657513e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.81512108276093787175849069715334396559730936013748e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.78125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.05946829281923976565971908177998486051088750363345e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.69237131791870252926200620656937624841227431099908e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.84375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.29003620591891062966640208281844476036608475704263e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.70276979914029738186601698003670175695927342267323e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.90625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.28080064428173146950050714257245479404308412291534e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.22141517097499177560650456638736418866609207598601e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.96875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.36798741582712685566660459972731625067678977232527e0), tolerance); +} + +template +void do_test_mapairy_quantile_lower() { + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + mapairy_distribution dist(static_cast(0), static_cast(1)); + + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -3)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.18765177572396470161180571018467021827812222940306e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -4)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.59822399410385083283727681965013516508738702183587e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -5)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.92947985498880164288717582971263069265106409267331e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -6)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.20998806047945704828514962310033638603842757237089e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -7)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.45496901537018787335070643452598202244805633093375e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -8)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.67354365380697578246790709817724830651682057476917e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -10)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.05331626220443158014611601879873776289585727270317e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -12)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.37844135506419820279451552495734835192576864639213e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -14)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.66475100891248273292011043832615771689316788780040e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -16)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.92187819510636694694450607724165688786106650410224e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -20)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.37198340489560108355199818136414022326129982959773e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -24)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.76028477470580699278679082649703786389435657070830e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -28)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.10419378761193307962534180728809528372838135595700e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -32)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.41443550638291131009585191506467027832121974024563e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -40)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.96026448032205844753104387192915744062974794241787e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -48)), BOOST_MATH_BIG_CONSTANT(RealType, N, -7.43355682497258196703130282301265363831865507562392e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -56)), BOOST_MATH_BIG_CONSTANT(RealType, N, -7.85432398223245647091315413807705550499215849013310e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -64)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.23500806363233607692361021471929015767129541901722e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -80)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.90695141954692987290075695228754376250679911499016e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -96)), BOOST_MATH_BIG_CONSTANT(RealType, N, -9.49149767947731938416802231743764792842977181401152e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -112)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.00124089585489463242206889945762289007601620222216e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -128)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.04845570631944023525776899386112795192930377369709e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -160)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.13196240444608571220864964743861585994042496495014e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -192)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.20475312075429969521072327133506584708675011147301e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -224)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.26971371983788322297191645356972895828456120573844e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -256)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.32865827226175697711590794217590458317537880046696e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -256)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.32865827226175697711590794217590458317537880046696e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -320)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.43303723297942256778502134022428247271630018451935e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -384)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.52413050129859916220402440029315858653640179700446e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -448)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.60549473918897820617071548963372947780704796096977e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -512)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.67937186583822375017526293948703697021001956377209e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -640)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.81028650954862232126230250634527453300745376729470e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -768)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.92461760606525027716536281516018610241283237089375e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -896)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.02678879167343824140492421290271637384390204302948e1), tolerance); + + // The test is terminated because p = 0 after this in fp64. + if(N <= 53){ + return; + } + + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -1024)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.11959316095291435774375635827672516757012476284109e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -1280)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.28411538998117558460421271505353342631141242515516e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -1536)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.42785430825019465839030492595482468932343681278349e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -1792)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.55634220352411216642587067287258912926964865154135e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -2048)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.67307564006689676593687414536012112442665935238575e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -2560)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.88006705102088142572333576380332410442882971872985e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -3072)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.06095108081567569614908765234696115918183394966145e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -3584)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.22266885559263332213569359568928269416384800428618e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -4096)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.36960987939726803544369406181770745085696438921663e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -5120)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.63019918860979732881015320942243118103538614206846e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -6144)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.85794964449030059786959849735574335608745736630897e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -7168)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.06158611834421484993920289979452825015061756369038e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -8192)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.24662793339079714510108682543625432044226557042768e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -10240)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.57480988422877882295975852850886697641784458384426e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -12288)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.86165509604814747106458415059041118242367113233764e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -14336)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.11814205550106893579346476384206383922751551334008e1), tolerance); +} + +template +void do_test_mapairy_quantile_upper() { + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + mapairy_distribution dist(static_cast(0), static_cast(1)); + + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, -7.16710685455022053317001962780672309444401429230030e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.46875))), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.96039220307812911673187933977720502663701213461352e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.4375))), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.70691924008504753346480857177819427470334982130948e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.40625))), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.39365150798529936746993802101957358628812489096556e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.00474815142578902619056852805926625285378475306847e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.34375))), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.20300033010874226241969971846939668616524880409485e-2), tolerance * 4); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.3125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.08557609474704600193050942980969400222443967430903e-1), tolerance * 4); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.28125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.84775355628419671941908878470333245751721020657513e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.81512108276093787175849069715334396559730936013748e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.21875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.05946829281923976565971908177998486051088750363345e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.1875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.69237131791870252926200620656937624841227431099908e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast(0.15625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.29003620591891062966640208281844476036608475704263e0), tolerance); + + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -3))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.70276979914029738186601698003670175695927342267323e0), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -4))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.22141517097499177560650456638736418866609207598601e0), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.36798741582712685566660459972731625067678977232527e0), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -6))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.63240190107315016002742117235901647573374032928275e0), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -7))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.37487073802383509843362175744776155324007021797019e1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -8))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.18429841828919685808654152263914024115105410242857e1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -10))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.50552947051871870287466561935025391775332522082960e1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38732922416208535732704088926085913683321415168727e2), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.49585416520688234273370911116680490544653394894587e2), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.80900106401310352569173769396359734191276945763559e2), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.59336704609415555521333325344685782416363311947039e3), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -24))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.55156669326461321628671444544286440554399508021258e4), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -28))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.25510428200953290831680861672648808196801287129080e5), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -32))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.43190196382396984755739962193081183260874381700500e6), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -40))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.77306696194440571351895040208958865072901805506645e7), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -48))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.32755474809804845725014907927003674268598932505496e9), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -56))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.38411271011676880227484089908793278086771148389202e10), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -64))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.78343716417988572841183668816998971166780683532365e12), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -80))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.14997210570212560225883973162628933329443239976683e15), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -96))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.99677152273063632531264438890163357898418349270434e18), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -112))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.62497388866235918645459765949830559011448320623360e22), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.64139290653028845793860203742403295803332310962125e25), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -160))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.97920970865877789202766313638336627695568626671453e31), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -192))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.84407885843159775798215826490261804734522750775796e38), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -224))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.87250989448760462117787200600339872607929758715096e44), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28743695332377477386366453958993754420344828061498e51), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28743695332377477386366453958993754420344828061498e51), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -320))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.98819430202303731872899900564216427734599065613620e63), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -384))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.27507518735966284635753747136817320622994832637767e76), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -448))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.38092093738495853701408205401696240265896114717023e89), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -512))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.05852403143769157513875467957386926795647775801563e102), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -640))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.49075014586685014041688089060587827415714205982384e128), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -768))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.26604066065618724776472365999448082037897284653223e153), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -896))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.54152887582709267575372250070675971504362522185818e179), tolerance); +} + +template +void do_test_mapairy_locscale_param() { + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + mapairy_distribution dist_0_1(static_cast(0), static_cast(1)); + mapairy_distribution dist_1_3(static_cast(1), static_cast(3)); + mapairy_distribution dist_0_invcbrt18(static_cast(0), 1 / cbrt(static_cast(18))); + + BOOST_CHECK_CLOSE(entropy(dist_0_1), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.0072768184106563460003025875575283708), tolerance); + BOOST_CHECK_CLOSE(entropy(dist_1_3), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.0072768184106563460003025875575283708) + log(static_cast(3)), tolerance); + BOOST_CHECK_CLOSE(entropy(dist_0_invcbrt18), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.0438195657786014485977283891231190450), tolerance); + + BOOST_CHECK_CLOSE(median(dist_0_1), BOOST_MATH_BIG_CONSTANT(RealType, N, -0.71671068545502205331700196278067230944440), tolerance); + BOOST_CHECK_CLOSE( + median(dist_1_3), + (1 + 3 * BOOST_MATH_BIG_CONSTANT(RealType, N, -0.71671068545502205331700196278067230944440)), + tolerance + ); + BOOST_CHECK_CLOSE(median(dist_0_invcbrt18), BOOST_MATH_BIG_CONSTANT(RealType, N, -0.27347630981017495237228835747364595601553), tolerance); + + BOOST_CHECK_CLOSE(mode(dist_0_1), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.1615872711359706852500000803029112987), tolerance); + BOOST_CHECK_CLOSE( + mode(dist_1_3), + (1 + 3 * BOOST_MATH_BIG_CONSTANT(RealType, N, -1.1615872711359706852500000803029112987)), + tolerance + ); + BOOST_CHECK_CLOSE(mode(dist_0_invcbrt18), BOOST_MATH_BIG_CONSTANT(RealType, N, -0.4432284977460014720866292801600737435), tolerance); + + BOOST_CHECK_EQUAL(mean(dist_0_1), static_cast(0)); + BOOST_CHECK_EQUAL(mean(dist_1_3), static_cast(1)); + BOOST_CHECK_EQUAL(mean(dist_0_invcbrt18), static_cast(0)); + + BOOST_CHECK((boost::math::isinf)(variance(dist_0_1))); + + BOOST_CHECK_CLOSE(pdf(dist_0_1, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.06251243013238748252181151646220197947016365256337e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist_1_3, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.97516171847191855609649452292217911972760948598791e-1) / 3, tolerance); + BOOST_CHECK_CLOSE(pdf(dist_0_invcbrt18, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.29264884227495575949271548513687010027396013858107e-2), tolerance); + + BOOST_CHECK_CLOSE(cdf(dist_0_1, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.15803029418984060112492565010063896412665755747313e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist_1_3, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.66666666666666666666666666666666666666666666666667e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist_0_invcbrt18, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.19730152884211135907661924151362266458717453254074e-1), tolerance); + + BOOST_CHECK_CLOSE(cdf(dist_0_1, quantile(dist_0_1, static_cast(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.25), tolerance); + BOOST_CHECK_CLOSE(cdf(dist_1_3, quantile(dist_1_3, static_cast(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.25), tolerance); + BOOST_CHECK_CLOSE(cdf(dist_0_invcbrt18, quantile(dist_0_invcbrt18, static_cast(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.25), tolerance); + + BOOST_CHECK_CLOSE(cdf(dist_0_1, quantile(dist_0_1, static_cast(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.75), tolerance); + BOOST_CHECK_CLOSE(cdf(dist_1_3, quantile(dist_1_3, static_cast(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.75), tolerance); + BOOST_CHECK_CLOSE(cdf(dist_0_invcbrt18, quantile(dist_0_invcbrt18, static_cast(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.75), tolerance); +} + +BOOST_AUTO_TEST_CASE(mapairy_pdf_fp64) +{ + do_test_mapairy_pdf(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(mapairy_pdf_std64) +{ + do_test_mapairy_pdf(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(mapairy_pdf_fp128) +{ + do_test_mapairy_pdf(); +} +#endif + +BOOST_AUTO_TEST_CASE(mapairy_cdf_fp64) +{ + do_test_mapairy_cdf(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(mapairy_cdf_std64) +{ + do_test_mapairy_cdf(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(mapairy_cdf_fp128) +{ + do_test_mapairy_cdf(); +} +#endif + +BOOST_AUTO_TEST_CASE(mapairy_ccdf_fp64) +{ + do_test_mapairy_ccdf(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(mapairy_ccdf_std64) +{ + do_test_mapairy_ccdf(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(mapairy_ccdf_fp128) +{ + do_test_mapairy_ccdf(); +} +#endif + +BOOST_AUTO_TEST_CASE(mapairy_quantile_nearzero_fp64) +{ + do_test_mapairy_quantile_nearzero(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(mapairy_quantile_nearzero_std64) +{ + do_test_mapairy_quantile_nearzero(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(mapairy_quantile_nearzero_fp128) +{ + do_test_mapairy_quantile_nearzero(); +} +#endif + +BOOST_AUTO_TEST_CASE(mapairy_quantile_lower_fp64) +{ + do_test_mapairy_quantile_lower(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(mapairy_quantile_lower_std64) +{ + do_test_mapairy_quantile_lower(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(mapairy_quantile_lower_fp128) +{ + do_test_mapairy_quantile_lower(); +} +#endif + +BOOST_AUTO_TEST_CASE(mapairy_quantile_upper_fp64) +{ + do_test_mapairy_quantile_upper(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(mapairy_quantile_upper_std64) +{ + do_test_mapairy_quantile_upper(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(mapairy_quantile_upper_fp128) +{ + do_test_mapairy_quantile_upper(); +} +#endif + +BOOST_AUTO_TEST_CASE(mapairy_locscale_fp64) +{ + do_test_mapairy_locscale_param(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(mapairy_locscale_std64) +{ + do_test_mapairy_locscale_param(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(mapairy_locscale_fp128) +{ + do_test_mapairy_locscale_param(); +} +#endif diff --git a/test/test_mapairy_cdf_double.cu b/test/test_mapairy_cdf_double.cu new file mode 100644 index 0000000000..7cb62a9343 --- /dev/null +++ b/test/test_mapairy_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::mapairy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::mapairy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_mapairy_cdf_float.cu b/test/test_mapairy_cdf_float.cu new file mode 100644 index 0000000000..b67c0ee933 --- /dev/null +++ b/test/test_mapairy_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::mapairy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::mapairy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_mapairy_cdf_nvrtc_double.cpp b/test/test_mapairy_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..87e7948b72 --- /dev/null +++ b/test/test_mapairy_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_mapairy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::mapairy_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_mapairy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_mapairy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_mapairy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::mapairy_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_mapairy_cdf_nvrtc_float.cpp b/test/test_mapairy_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..d84404c5e1 --- /dev/null +++ b/test/test_mapairy_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_mapairy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::mapairy_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_mapairy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_mapairy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_mapairy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::mapairy_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_mapairy_pdf_double.cu b/test/test_mapairy_pdf_double.cu new file mode 100644 index 0000000000..4ccd8b2f23 --- /dev/null +++ b/test/test_mapairy_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::mapairy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::mapairy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_mapairy_pdf_float.cu b/test/test_mapairy_pdf_float.cu new file mode 100644 index 0000000000..520ac9a68a --- /dev/null +++ b/test/test_mapairy_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::mapairy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::mapairy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_mapairy_pdf_nvrtc_double.cpp b/test/test_mapairy_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..5f461d6d13 --- /dev/null +++ b/test/test_mapairy_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_mapairy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::mapairy_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_mapairy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_mapairy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_mapairy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::mapairy_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_mapairy_pdf_nvrtc_float.cpp b/test/test_mapairy_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..39eb4152d0 --- /dev/null +++ b/test/test_mapairy_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_mapairy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::mapairy_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_mapairy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_mapairy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_mapairy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::mapairy_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_mapairy_quan_double.cu b/test/test_mapairy_quan_double.cu new file mode 100644 index 0000000000..3787000207 --- /dev/null +++ b/test/test_mapairy_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::mapairy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::mapairy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 5000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_mapairy_quan_float.cu b/test/test_mapairy_quan_float.cu new file mode 100644 index 0000000000..cd9d120070 --- /dev/null +++ b/test/test_mapairy_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::mapairy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::mapairy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 15000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_mapairy_quan_nvrtc_double.cpp b/test/test_mapairy_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..43ac17c848 --- /dev/null +++ b/test/test_mapairy_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_mapairy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::mapairy_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_mapairy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_mapairy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_mapairy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::mapairy_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_mapairy_quan_nvrtc_float.cpp b/test/test_mapairy_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..a127843e93 --- /dev/null +++ b/test/test_mapairy_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_mapairy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::mapairy_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_mapairy_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_mapairy_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_mapairy_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::mapairy_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_modf_double.cu b/test/test_modf_double.cu new file mode 100644 index 0000000000..06e65c1063 --- /dev/null +++ b/test/test_modf_double.cu @@ -0,0 +1,105 @@ + +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + float_type fract; + int i_part; + long l_part; + long long ll_part; + + if (i < numElements) + { + out[i] = boost::math::modf(in[i], &fract) + boost::math::modf(in[i], &i_part) + boost::math::modf(in[i], &l_part) + boost::math::modf(in[i], &ll_part); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + float_type fract; + for(int i = 0; i < numElements; ++i) + results.push_back(4 * boost::math::modf(h_A[i], &fract)); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(h_C[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/test_modf_float.cu b/test/test_modf_float.cu new file mode 100644 index 0000000000..06e65c1063 --- /dev/null +++ b/test/test_modf_float.cu @@ -0,0 +1,105 @@ + +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + float_type fract; + int i_part; + long l_part; + long long ll_part; + + if (i < numElements) + { + out[i] = boost::math::modf(in[i], &fract) + boost::math::modf(in[i], &i_part) + boost::math::modf(in[i], &l_part) + boost::math::modf(in[i], &ll_part); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + float_type fract; + for(int i = 0; i < numElements; ++i) + results.push_back(4 * boost::math::modf(h_A[i], &fract)); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(h_C[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/test_modf_nvrtc_double.cpp b/test/test_modf_nvrtc_double.cpp new file mode 100644 index 0000000000..f172dd52c1 --- /dev/null +++ b/test/test_modf_nvrtc_double.cpp @@ -0,0 +1,200 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_modf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + float_type fract; + int i_part; + long l_part; + long long ll_part; + + if (i < numElements) + { + out[i] = boost::math::modf(in1[i], &fract) + + boost::math::modf(in1[i], &i_part) + + boost::math::modf(in1[i], &l_part) + + boost::math::modf(in1[i], &ll_part); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_modf_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_modf_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_modf_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + float_type fract; + const auto res = 4 * boost::math::modf(h_in1[i], &fract); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_modf_nvrtc_float.cpp b/test/test_modf_nvrtc_float.cpp new file mode 100644 index 0000000000..1dcd3c0810 --- /dev/null +++ b/test/test_modf_nvrtc_float.cpp @@ -0,0 +1,200 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_modf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + float_type fract; + int i_part; + long l_part; + long long ll_part; + + if (i < numElements) + { + out[i] = boost::math::modf(in1[i], &fract) + + boost::math::modf(in1[i], &i_part) + + boost::math::modf(in1[i], &l_part) + + boost::math::modf(in1[i], &ll_part); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_modf_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_modf_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_modf_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + float_type fract; + const auto res = 4 * boost::math::modf(h_in1[i], &fract); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_nc_beta.cpp b/test/test_nc_beta.cpp index 3e7c08d0f3..105db62a3e 100644 --- a/test/test_nc_beta.cpp +++ b/test/test_nc_beta.cpp @@ -11,6 +11,9 @@ // This must appear *before* any #includes, and precludes pch usage: // #define BOOST_MATH_ASSERT_UNDEFINED_POLICY false +#ifndef BOOST_MATH_OVERFLOW_ERROR_POLICY +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#endif #ifdef _MSC_VER #pragma warning (disable:4127 4512) @@ -27,7 +30,12 @@ # define TEST_REAL_CONCEPT #endif +#include + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include // for real_concept +#endif + #include // for chi_squared_distribution #include // for poisson_distribution #define BOOST_TEST_MAIN @@ -41,6 +49,7 @@ #include "test_ncbeta_hooks.hpp" #include "table_type.hpp" #include "test_nc_beta.hpp" +#include "../include_private/boost/math/tools/test.hpp" #include using std::cout; diff --git a/test/test_nc_beta.hpp b/test/test_nc_beta.hpp index 3ba983adca..39b4ede9eb 100644 --- a/test/test_nc_beta.hpp +++ b/test/test_nc_beta.hpp @@ -6,7 +6,9 @@ #ifndef BOOST_MATH_OVERFLOW_ERROR_POLICY #define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error #endif -#include +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS +#include // for real_concept +#endif #define BOOST_TEST_MAIN #include #include diff --git a/test/test_nc_beta_cdf_double.cu b/test/test_nc_beta_cdf_double.cu new file mode 100644 index 0000000000..75073ac8d7 --- /dev/null +++ b/test/test_nc_beta_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch non_central_beta distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_nc_beta_cdf_float.cu b/test/test_nc_beta_cdf_float.cu new file mode 100644 index 0000000000..1088678c29 --- /dev/null +++ b/test/test_nc_beta_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch non_central_beta distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_nc_beta_cdf_nvrtc_double.cpp b/test/test_nc_beta_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..4b9523fb22 --- /dev/null +++ b/test/test_nc_beta_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_non_central_beta_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_beta_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_non_central_beta_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_beta_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_nc_beta_cdf_nvrtc_float.cpp b/test/test_nc_beta_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..ec63159b41 --- /dev/null +++ b/test/test_nc_beta_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_non_central_beta_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_beta_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_non_central_beta_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_beta_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_nc_beta_pdf_double.cu b/test/test_nc_beta_pdf_double.cu new file mode 100644 index 0000000000..485cf1d77c --- /dev/null +++ b/test/test_nc_beta_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch non_central_beta distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_nc_beta_pdf_float.cu b/test/test_nc_beta_pdf_float.cu new file mode 100644 index 0000000000..bd989a330a --- /dev/null +++ b/test/test_nc_beta_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch non_central_beta distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_nc_beta_pdf_nvrtc_double.cpp b/test/test_nc_beta_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..64387cebc6 --- /dev/null +++ b/test/test_nc_beta_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_non_central_beta_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_beta_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_non_central_beta_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_beta_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_nc_beta_pdf_nvrtc_float.cpp b/test/test_nc_beta_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..de85a7ebc6 --- /dev/null +++ b/test/test_nc_beta_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_non_central_beta_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_beta_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_non_central_beta_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_beta_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_nc_beta_quan_double.cu b/test/test_nc_beta_quan_double.cu new file mode 100644 index 0000000000..eb1a872774 --- /dev/null +++ b/test/test_nc_beta_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch non_central_beta distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_nc_beta_quan_float.cu b/test/test_nc_beta_quan_float.cu new file mode 100644 index 0000000000..f205e810c7 --- /dev/null +++ b/test/test_nc_beta_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch non_central_beta distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_nc_beta_quan_nvrtc_double.cpp b/test/test_nc_beta_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..61651d85e3 --- /dev/null +++ b/test/test_nc_beta_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_non_central_beta_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_beta_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_non_central_beta_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_beta_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_nc_beta_quan_nvrtc_float.cpp b/test/test_nc_beta_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..3c9a0ce231 --- /dev/null +++ b/test/test_nc_beta_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_non_central_beta_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_beta_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_non_central_beta_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_beta_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::non_central_beta_distribution(0.5, 0.5, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_nc_chi_squared.cpp b/test/test_nc_chi_squared.cpp index 14bec61259..0d6f261fa3 100644 --- a/test/test_nc_chi_squared.cpp +++ b/test/test_nc_chi_squared.cpp @@ -7,7 +7,9 @@ // (See accompanying file LICENSE_1_0.txt // or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif #ifdef _MSC_VER #pragma warning (disable:4127 4512) @@ -24,8 +26,18 @@ # define TEST_REAL_CONCEPT #endif -#include +#ifndef BOOST_MATH_OVERFLOW_ERROR_POLICY +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#endif + +#include + +#include "../include_private/boost/math/tools/test.hpp" + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include // for real_concept +#endif + #include // for chi_squared_distribution #include // for chi_squared_distribution #define BOOST_TEST_MAIN diff --git a/test/test_nc_chi_squared.hpp b/test/test_nc_chi_squared.hpp index b2fa6d75be..3ab276a5a0 100644 --- a/test/test_nc_chi_squared.hpp +++ b/test/test_nc_chi_squared.hpp @@ -80,7 +80,7 @@ void test_spot( boost::math::non_central_chi_squared_distribution dist(df, ncp); BOOST_CHECK_CLOSE( cdf(dist, cs), P, tol); -#ifndef BOOST_NO_EXCEPTIONS +#if !defined(BOOST_NO_EXCEPTIONS) && !defined(BOOST_MATH_NO_EXCEPTIONS) try{ BOOST_CHECK_CLOSE( pdf(dist, cs), naive_pdf(dist.degrees_of_freedom(), ncp, cs), tol * 150); @@ -402,7 +402,7 @@ void quantile_sanity_check(T& data, const char* type_name, const char* test) // Sanity check mode, the accuracy of // the mode is at *best* the square root of the accuracy of the PDF: // -#ifndef BOOST_NO_EXCEPTIONS +#if !defined(BOOST_NO_EXCEPTIONS) && !defined(BOOST_MATH_NO_EXCEPTIONS) try{ value_type m = mode(boost::math::non_central_chi_squared_distribution(data[i][0], data[i][1])); value_type p = pdf(boost::math::non_central_chi_squared_distribution(data[i][0], data[i][1]), m); @@ -417,7 +417,7 @@ void quantile_sanity_check(T& data, const char* type_name, const char* test) // values to get back to the correct degrees of freedom or // non-centrality parameter: // -#ifndef BOOST_NO_EXCEPTIONS +#if !defined(BOOST_NO_EXCEPTIONS) && !defined(BOOST_MATH_NO_EXCEPTIONS) try{ #endif if((data[i][3] < 0.99) && (data[i][3] != 0)) @@ -438,7 +438,7 @@ void quantile_sanity_check(T& data, const char* type_name, const char* test) boost::math::non_central_chi_squared_distribution::find_non_centrality(boost::math::complement(data[i][0], data[i][2], data[i][4])), data[i][1], precision, i); } -#ifndef BOOST_NO_EXCEPTIONS +#if !defined(BOOST_NO_EXCEPTIONS) && !defined(BOOST_MATH_NO_EXCEPTIONS) } catch(const std::exception& e) { diff --git a/test/test_nc_chi_squared_cdf_double.cu b/test/test_nc_chi_squared_cdf_double.cu new file mode 100644 index 0000000000..64a442f6f8 --- /dev/null +++ b/test/test_nc_chi_squared_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::non_central_chi_squared_distribution(1, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch non_central_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::non_central_chi_squared_distribution(1, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_nc_chi_squared_cdf_float.cu b/test/test_nc_chi_squared_cdf_float.cu new file mode 100644 index 0000000000..8ac518adc8 --- /dev/null +++ b/test/test_nc_chi_squared_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::non_central_chi_squared_distribution(1, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch non_central_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::non_central_chi_squared_distribution(1, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_nc_chi_squared_cdf_nvrtc_double.cpp b/test/test_nc_chi_squared_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..05569b02eb --- /dev/null +++ b/test/test_nc_chi_squared_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_non_central_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::non_central_chi_squared_distribution(1, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_chi_squared_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_non_central_chi_squared_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_chi_squared_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::non_central_chi_squared_distribution(1, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_nc_chi_squared_cdf_nvrtc_float.cpp b/test/test_nc_chi_squared_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..57964384bd --- /dev/null +++ b/test/test_nc_chi_squared_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_non_central_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::non_central_chi_squared_distribution(1, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_chi_squared_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_non_central_chi_squared_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_chi_squared_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::non_central_chi_squared_distribution(1, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_nc_chi_squared_pdf_double.cu b/test/test_nc_chi_squared_pdf_double.cu new file mode 100644 index 0000000000..19a96944bd --- /dev/null +++ b/test/test_nc_chi_squared_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::non_central_chi_squared_distribution(1, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch non_central_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::non_central_chi_squared_distribution(1, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_nc_chi_squared_pdf_float.cu b/test/test_nc_chi_squared_pdf_float.cu new file mode 100644 index 0000000000..9433005b26 --- /dev/null +++ b/test/test_nc_chi_squared_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::non_central_chi_squared_distribution(1, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch non_central_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::non_central_chi_squared_distribution(1, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_nc_chi_squared_pdf_nvrtc_double.cpp b/test/test_nc_chi_squared_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..4ddede8281 --- /dev/null +++ b/test/test_nc_chi_squared_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_non_central_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::non_central_chi_squared_distribution(1, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_chi_squared_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_non_central_chi_squared_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_chi_squared_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::non_central_chi_squared_distribution(1, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_nc_chi_squared_pdf_nvrtc_float.cpp b/test/test_nc_chi_squared_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..ec749b2a4b --- /dev/null +++ b/test/test_nc_chi_squared_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_non_central_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::non_central_chi_squared_distribution(1, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_chi_squared_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_non_central_chi_squared_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_chi_squared_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::non_central_chi_squared_distribution(1, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_nc_chi_squared_quan_double.cu b/test/test_nc_chi_squared_quan_double.cu new file mode 100644 index 0000000000..a54a2e015c --- /dev/null +++ b/test/test_nc_chi_squared_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::non_central_chi_squared_distribution(1, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch non_central_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::non_central_chi_squared_distribution(1, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_nc_chi_squared_quan_float.cu b/test/test_nc_chi_squared_quan_float.cu new file mode 100644 index 0000000000..7f83eb5c0b --- /dev/null +++ b/test/test_nc_chi_squared_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::non_central_chi_squared_distribution(1, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch non_central_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::non_central_chi_squared_distribution(1, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_nc_chi_squared_quan_nvrtc_double.cpp b/test/test_nc_chi_squared_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..f2ca532622 --- /dev/null +++ b/test/test_nc_chi_squared_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_non_central_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::non_central_chi_squared_distribution(1, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_chi_squared_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_non_central_chi_squared_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_chi_squared_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::non_central_chi_squared_distribution(1, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_nc_chi_squared_quan_nvrtc_float.cpp b/test/test_nc_chi_squared_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..931a1aa60e --- /dev/null +++ b/test/test_nc_chi_squared_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_non_central_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::non_central_chi_squared_distribution(1, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_chi_squared_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_non_central_chi_squared_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_chi_squared_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::non_central_chi_squared_distribution(1, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_nc_f.cpp b/test/test_nc_f.cpp index 1cf411516d..7de8b254d5 100644 --- a/test/test_nc_f.cpp +++ b/test/test_nc_f.cpp @@ -7,7 +7,9 @@ // (See accompanying file LICENSE_1_0.txt // or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif #ifdef _MSC_VER #pragma warning (disable:4127 4512) @@ -20,8 +22,12 @@ # define TEST_REAL_CONCEPT #endif -#include +#include "../include_private/boost/math/tools/test.hpp" + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include // for real_concept +#endif + #include // for chi_squared_distribution #define BOOST_TEST_MAIN #include // for test_main diff --git a/test/test_nc_f_cdf_double.cu b/test/test_nc_f_cdf_double.cu new file mode 100644 index 0000000000..a4e4c442ff --- /dev/null +++ b/test/test_nc_f_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch non_central_f distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_nc_f_cdf_float.cu b/test/test_nc_f_cdf_float.cu new file mode 100644 index 0000000000..79a7d84144 --- /dev/null +++ b/test/test_nc_f_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch non_central_f distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_nc_f_cdf_nvrtc_double.cpp b/test/test_nc_f_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..1f35ba2ec4 --- /dev/null +++ b/test/test_nc_f_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_non_central_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_f_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_non_central_f_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_f_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_nc_f_cdf_nvrtc_float.cpp b/test/test_nc_f_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..4aaa8b3910 --- /dev/null +++ b/test/test_nc_f_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_non_central_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_f_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_non_central_f_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_f_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_nc_f_pdf_double.cu b/test/test_nc_f_pdf_double.cu new file mode 100644 index 0000000000..3a156ffb71 --- /dev/null +++ b/test/test_nc_f_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch non_central_f distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_nc_f_pdf_float.cu b/test/test_nc_f_pdf_float.cu new file mode 100644 index 0000000000..8a792b2f7e --- /dev/null +++ b/test/test_nc_f_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch non_central_f distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_nc_f_pdf_nvrtc_double.cpp b/test/test_nc_f_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..bb5da728e9 --- /dev/null +++ b/test/test_nc_f_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_non_central_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_f_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_non_central_f_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_f_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_nc_f_pdf_nvrtc_float.cpp b/test/test_nc_f_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..db46004729 --- /dev/null +++ b/test/test_nc_f_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_non_central_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_f_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_non_central_f_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_f_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_nc_f_quan_double.cu b/test/test_nc_f_quan_double.cu new file mode 100644 index 0000000000..687f789616 --- /dev/null +++ b/test/test_nc_f_quan_double.cu @@ -0,0 +1,117 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch non_central_f distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + bool failed = false; + for(int i = 0; i < numElements; ++i) + { + // Nearly all values are within 150 eps but their out outliers that hit ~100'000 eps + // Typically this occurs around 0 on device with doubles. + // Floats do not have this issue + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + failed = true; + } + } + + if (failed) + return EXIT_FAILURE; + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_nc_f_quan_float.cu b/test/test_nc_f_quan_float.cu new file mode 100644 index 0000000000..e2cd182aed --- /dev/null +++ b/test/test_nc_f_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch non_central_f distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_nc_f_quan_nvrtc_double.cpp b/test/test_nc_f_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..c2a6acd553 --- /dev/null +++ b/test/test_nc_f_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_non_central_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_f_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_non_central_f_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_f_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_nc_f_quan_nvrtc_float.cpp b/test/test_nc_f_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..2e2aefc5e5 --- /dev/null +++ b/test/test_nc_f_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_non_central_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_f_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_non_central_f_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_f_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::non_central_f_distribution(0.5, 0.5, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_negative_binomial.cpp b/test/test_negative_binomial.cpp index 069ebf8798..69f42b4a4b 100644 --- a/test/test_negative_binomial.cpp +++ b/test/test_negative_binomial.cpp @@ -26,9 +26,13 @@ # define TEST_REAL_CONCEPT #endif -#include // for real_concept +#include +#include "../include_private/boost/math/tools/test.hpp" + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include // for real_concept using ::boost::math::concepts::real_concept; +#endif #include // for negative_binomial_distribution using boost::math::negative_binomial_distribution; diff --git a/test/test_negative_binomial_cdf_double.cu b/test/test_negative_binomial_cdf_double.cu new file mode 100644 index 0000000000..6c4ae4e07d --- /dev/null +++ b/test/test_negative_binomial_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::negative_binomial_distribution(1, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch negative_binomial distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::negative_binomial_distribution(1, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_negative_binomial_cdf_float.cu b/test/test_negative_binomial_cdf_float.cu new file mode 100644 index 0000000000..0f5849474b --- /dev/null +++ b/test/test_negative_binomial_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::negative_binomial_distribution(1, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch negative_binomial distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::negative_binomial_distribution(1, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_negative_binomial_cdf_nvrtc_double.cpp b/test/test_negative_binomial_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..1cb7af6663 --- /dev/null +++ b/test/test_negative_binomial_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_negative_binomial_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::negative_binomial_distribution(1, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_negative_binomial_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_negative_binomial_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_negative_binomial_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::negative_binomial_distribution(1, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_negative_binomial_cdf_nvrtc_float.cpp b/test/test_negative_binomial_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..f0d1b16476 --- /dev/null +++ b/test/test_negative_binomial_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_negative_binomial_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::negative_binomial_distribution(1, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_negative_binomial_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_negative_binomial_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_negative_binomial_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::negative_binomial_distribution(1, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_negative_binomial_pdf_double.cu b/test/test_negative_binomial_pdf_double.cu new file mode 100644 index 0000000000..16bd2ee487 --- /dev/null +++ b/test/test_negative_binomial_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::negative_binomial_distribution(1, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch negative_binomial distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::negative_binomial_distribution(1, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_negative_binomial_pdf_float.cu b/test/test_negative_binomial_pdf_float.cu new file mode 100644 index 0000000000..d9965c3050 --- /dev/null +++ b/test/test_negative_binomial_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::negative_binomial_distribution(1, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch negative_binomial distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::negative_binomial_distribution(1, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_negative_binomial_pdf_nvrtc_double.cpp b/test/test_negative_binomial_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..395f9a30e5 --- /dev/null +++ b/test/test_negative_binomial_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_negative_binomial_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::negative_binomial_distribution(1, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_negative_binomial_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_negative_binomial_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_negative_binomial_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::negative_binomial_distribution(1, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_negative_binomial_pdf_nvrtc_float.cpp b/test/test_negative_binomial_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..ad20351fcd --- /dev/null +++ b/test/test_negative_binomial_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_negative_binomial_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::negative_binomial_distribution(1, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_negative_binomial_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_negative_binomial_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_negative_binomial_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::negative_binomial_distribution(1, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_negative_binomial_quan_double.cu b/test/test_negative_binomial_quan_double.cu new file mode 100644 index 0000000000..2ecabc86c7 --- /dev/null +++ b/test/test_negative_binomial_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::negative_binomial_distribution(1, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch negative_binomial distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::negative_binomial_distribution(1, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_negative_binomial_quan_float.cu b/test/test_negative_binomial_quan_float.cu new file mode 100644 index 0000000000..e9a3aece3a --- /dev/null +++ b/test/test_negative_binomial_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::negative_binomial_distribution(1, 0.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch negative_binomial distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::negative_binomial_distribution(1, 0.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_negative_binomial_quan_nvrtc_double.cpp b/test/test_negative_binomial_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..c63342ba66 --- /dev/null +++ b/test/test_negative_binomial_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_negative_binomial_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::negative_binomial_distribution(1, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_negative_binomial_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_negative_binomial_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_negative_binomial_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::negative_binomial_distribution(1, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_negative_binomial_quan_nvrtc_float.cpp b/test/test_negative_binomial_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..6d7e98839f --- /dev/null +++ b/test/test_negative_binomial_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_negative_binomial_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::negative_binomial_distribution(1, 0.5), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_negative_binomial_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_negative_binomial_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_negative_binomial_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::negative_binomial_distribution(1, 0.5), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_nonfinite_io.cpp b/test/test_nonfinite_io.cpp index b917549810..855843c715 100644 --- a/test/test_nonfinite_io.cpp +++ b/test/test_nonfinite_io.cpp @@ -14,8 +14,8 @@ #define BOOST_TEST_MAIN #include -#include // Similar to BOOST_CLOSE_FRACTION. -#include // To create test strings like std::basic_string s = S_("0 -0"); +#include // Similar to BOOST_CLOSE_FRACTION. +#include // To create test strings like std::basic_string s = S_("0 -0"); #include #include diff --git a/test/test_normal.cpp b/test/test_normal.cpp index ef984d5e63..e68a1f82e3 100644 --- a/test/test_normal.cpp +++ b/test/test_normal.cpp @@ -15,7 +15,9 @@ // From MathWorld--A Wolfram Web Resource. // http://mathworld.wolfram.com/NormalDistribution.html +#ifndef SYCL_LANGUAGE_VERSION #include // include directory /libs/math/src/tr1/ is needed. +#endif #ifdef _MSC_VER # pragma warning (disable: 4127) // conditional expression is constant @@ -23,15 +25,20 @@ // and if (std::numeric_limits::has_quiet_NaN) #endif -#include +#include + +#include "../include_private/boost/math/tools/test.hpp" + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include // for real_concept +#endif + #define BOOST_TEST_MAIN #include // Boost.Test #include #include using boost::math::normal_distribution; -#include #include "test_out_of_range.hpp" #include diff --git a/test/test_normal_cdf_double.cu b/test/test_normal_cdf_double.cu new file mode 100644 index 0000000000..cd99d49253 --- /dev/null +++ b/test/test_normal_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::normal_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch normal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::normal_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_normal_cdf_float.cu b/test/test_normal_cdf_float.cu new file mode 100644 index 0000000000..c8e422f6e3 --- /dev/null +++ b/test/test_normal_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::normal_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch normal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::normal_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_normal_cdf_nvrtc_double.cpp b/test/test_normal_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..934e18d434 --- /dev/null +++ b/test/test_normal_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_normal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::normal_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_normal_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_normal_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_normal_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::normal_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_normal_cdf_nvrtc_float.cpp b/test/test_normal_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..06a2351d2e --- /dev/null +++ b/test/test_normal_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_normal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::normal_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_normal_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_normal_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_normal_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::normal_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_normal_pdf_double.cu b/test/test_normal_pdf_double.cu new file mode 100644 index 0000000000..b318023318 --- /dev/null +++ b/test/test_normal_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::normal_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch normal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::normal_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_normal_pdf_float.cu b/test/test_normal_pdf_float.cu new file mode 100644 index 0000000000..155278fede --- /dev/null +++ b/test/test_normal_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::normal_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch normal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::normal_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_normal_pdf_nvrtc_double.cpp b/test/test_normal_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..216e264dd1 --- /dev/null +++ b/test/test_normal_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_normal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::normal_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_normal_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_normal_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_normal_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::normal_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_normal_pdf_nvrtc_float.cpp b/test/test_normal_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..669a1aad26 --- /dev/null +++ b/test/test_normal_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_normal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::normal_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_normal_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_normal_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_normal_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::normal_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_normal_quan_double.cu b/test/test_normal_quan_double.cu new file mode 100644 index 0000000000..ca7fea427f --- /dev/null +++ b/test/test_normal_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::normal_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch normal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::normal_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_normal_quan_float.cu b/test/test_normal_quan_float.cu new file mode 100644 index 0000000000..ca7fea427f --- /dev/null +++ b/test/test_normal_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::normal_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch normal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::normal_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_normal_quan_nvrtc_double.cpp b/test/test_normal_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..f47650f708 --- /dev/null +++ b/test/test_normal_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_normal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::normal_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_normal_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_normal_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_normal_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::normal_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_normal_quan_nvrtc_float.cpp b/test/test_normal_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..d988472778 --- /dev/null +++ b/test/test_normal_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_normal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::normal_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_normal_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_normal_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_normal_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::normal_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_out_of_range.hpp b/test/test_out_of_range.hpp index a8c93576f4..8b7e723c07 100644 --- a/test/test_out_of_range.hpp +++ b/test/test_out_of_range.hpp @@ -8,8 +8,9 @@ #ifndef BOOST_MATH_TEST_OUT_OF_RANGE_HPP #define BOOST_MATH_TEST_OUT_OF_RANGE_HPP +#include #include -#include +#include /*` check_out_of_range functions check that bad parameters passed to constructors and functions throw domain_error exceptions. @@ -30,6 +31,11 @@ but does *not* check finite but out-of-range parameters to the constructor because these are specific to each distribution. */ +#if defined(BOOST_CHECK_THROW) && defined(BOOST_MATH_NO_EXCEPTIONS) +# undef BOOST_CHECK_THROW +# define BOOST_CHECK_THROW(x, y) +#endif + #ifdef _MSC_VER #pragma warning(push) #pragma warning(disable:4127) @@ -48,60 +54,60 @@ void check_support(const Distro& d, bool Infinite = false) value_type m = (range(d).first == 0) ? -boost::math::tools::min_value() : boost::math::float_prior(range(d).first); BOOST_MATH_ASSERT(m != range(d).first); BOOST_MATH_ASSERT(m < range(d).first); - BOOST_MATH_CHECK_THROW(pdf(d, m), std::domain_error); - BOOST_MATH_CHECK_THROW(cdf(d, m), std::domain_error); - BOOST_MATH_CHECK_THROW(cdf(complement(d, m)), std::domain_error); + BOOST_CHECK_THROW(pdf(d, m), std::domain_error); + BOOST_CHECK_THROW(cdf(d, m), std::domain_error); + BOOST_CHECK_THROW(cdf(complement(d, m)), std::domain_error); } if ((boost::math::isfinite)(range(d).second) && (range(d).second != boost::math::tools::max_value())) { // If possible, check that a random variable value just more than the top of the supported range throws domain errors. value_type m = (range(d).second == 0) ? boost::math::tools::min_value() : boost::math::float_next(range(d).second); BOOST_MATH_ASSERT(m != range(d).first); BOOST_MATH_ASSERT(m > range(d).first); - BOOST_MATH_CHECK_THROW(pdf(d, m), std::domain_error); - BOOST_MATH_CHECK_THROW(cdf(d, m), std::domain_error); - BOOST_MATH_CHECK_THROW(cdf(complement(d, m)), std::domain_error); + BOOST_CHECK_THROW(pdf(d, m), std::domain_error); + BOOST_CHECK_THROW(cdf(d, m), std::domain_error); + BOOST_CHECK_THROW(cdf(complement(d, m)), std::domain_error); } if (std::numeric_limits::has_infinity) { // Infinity is available, if ((boost::math::isfinite)(range(d).second)) { // and top of range doesn't include infinity, // check that using infinity throws domain errors. - BOOST_MATH_CHECK_THROW(pdf(d, std::numeric_limits::infinity()), std::domain_error); - BOOST_MATH_CHECK_THROW(cdf(d, std::numeric_limits::infinity()), std::domain_error); - BOOST_MATH_CHECK_THROW(cdf(complement(d, std::numeric_limits::infinity())), std::domain_error); + BOOST_CHECK_THROW(pdf(d, std::numeric_limits::infinity()), std::domain_error); + BOOST_CHECK_THROW(cdf(d, std::numeric_limits::infinity()), std::domain_error); + BOOST_CHECK_THROW(cdf(complement(d, std::numeric_limits::infinity())), std::domain_error); } if ((boost::math::isfinite)(range(d).first)) { // and bottom of range doesn't include infinity, // check that using infinity throws domain_error exception. - BOOST_MATH_CHECK_THROW(pdf(d, -std::numeric_limits::infinity()), std::domain_error); - BOOST_MATH_CHECK_THROW(cdf(d, -std::numeric_limits::infinity()), std::domain_error); - BOOST_MATH_CHECK_THROW(cdf(complement(d, -std::numeric_limits::infinity())), std::domain_error); + BOOST_CHECK_THROW(pdf(d, -std::numeric_limits::infinity()), std::domain_error); + BOOST_CHECK_THROW(cdf(d, -std::numeric_limits::infinity()), std::domain_error); + BOOST_CHECK_THROW(cdf(complement(d, -std::numeric_limits::infinity())), std::domain_error); } // Check that using infinity with quantiles always throws domain_error exception. - BOOST_MATH_CHECK_THROW(quantile(d, std::numeric_limits::infinity()), std::domain_error); - BOOST_MATH_CHECK_THROW(quantile(d, -std::numeric_limits::infinity()), std::domain_error); - BOOST_MATH_CHECK_THROW(quantile(complement(d, std::numeric_limits::infinity())), std::domain_error); - BOOST_MATH_CHECK_THROW(quantile(complement(d, -std::numeric_limits::infinity())), std::domain_error); + BOOST_CHECK_THROW(quantile(d, std::numeric_limits::infinity()), std::domain_error); + BOOST_CHECK_THROW(quantile(d, -std::numeric_limits::infinity()), std::domain_error); + BOOST_CHECK_THROW(quantile(complement(d, std::numeric_limits::infinity())), std::domain_error); + BOOST_CHECK_THROW(quantile(complement(d, -std::numeric_limits::infinity())), std::domain_error); } } if(std::numeric_limits::has_quiet_NaN) { // NaN is available. - BOOST_MATH_CHECK_THROW(pdf(d, std::numeric_limits::quiet_NaN()), std::domain_error); - BOOST_MATH_CHECK_THROW(cdf(d, std::numeric_limits::quiet_NaN()), std::domain_error); - BOOST_MATH_CHECK_THROW(cdf(complement(d, std::numeric_limits::quiet_NaN())), std::domain_error); - BOOST_MATH_CHECK_THROW(pdf(d, -std::numeric_limits::quiet_NaN()), std::domain_error); - BOOST_MATH_CHECK_THROW(cdf(d, -std::numeric_limits::quiet_NaN()), std::domain_error); - BOOST_MATH_CHECK_THROW(cdf(complement(d, -std::numeric_limits::quiet_NaN())), std::domain_error); - BOOST_MATH_CHECK_THROW(quantile(d, std::numeric_limits::quiet_NaN()), std::domain_error); - BOOST_MATH_CHECK_THROW(quantile(d, -std::numeric_limits::quiet_NaN()), std::domain_error); - BOOST_MATH_CHECK_THROW(quantile(complement(d, std::numeric_limits::quiet_NaN())), std::domain_error); - BOOST_MATH_CHECK_THROW(quantile(complement(d, -std::numeric_limits::quiet_NaN())), std::domain_error); + BOOST_CHECK_THROW(pdf(d, std::numeric_limits::quiet_NaN()), std::domain_error); + BOOST_CHECK_THROW(cdf(d, std::numeric_limits::quiet_NaN()), std::domain_error); + BOOST_CHECK_THROW(cdf(complement(d, std::numeric_limits::quiet_NaN())), std::domain_error); + BOOST_CHECK_THROW(pdf(d, -std::numeric_limits::quiet_NaN()), std::domain_error); + BOOST_CHECK_THROW(cdf(d, -std::numeric_limits::quiet_NaN()), std::domain_error); + BOOST_CHECK_THROW(cdf(complement(d, -std::numeric_limits::quiet_NaN())), std::domain_error); + BOOST_CHECK_THROW(quantile(d, std::numeric_limits::quiet_NaN()), std::domain_error); + BOOST_CHECK_THROW(quantile(d, -std::numeric_limits::quiet_NaN()), std::domain_error); + BOOST_CHECK_THROW(quantile(complement(d, std::numeric_limits::quiet_NaN())), std::domain_error); + BOOST_CHECK_THROW(quantile(complement(d, -std::numeric_limits::quiet_NaN())), std::domain_error); } // Check that using probability outside [0,1] with quantiles always throws domain_error exception. - BOOST_MATH_CHECK_THROW(quantile(d, -1), std::domain_error); - BOOST_MATH_CHECK_THROW(quantile(d, 2), std::domain_error); - BOOST_MATH_CHECK_THROW(quantile(complement(d, -1)), std::domain_error); - BOOST_MATH_CHECK_THROW(quantile(complement(d, 2)), std::domain_error); + BOOST_CHECK_THROW(quantile(d, -1), std::domain_error); + BOOST_CHECK_THROW(quantile(d, 2), std::domain_error); + BOOST_CHECK_THROW(quantile(complement(d, -1)), std::domain_error); + BOOST_CHECK_THROW(quantile(complement(d, 2)), std::domain_error); } // Four check_out_of_range versions for distributions with zero to 3 constructor parameters. @@ -121,12 +127,12 @@ void check_out_of_range(typename Distro::value_type p1) check_support(d); if(std::numeric_limits::has_infinity) { - BOOST_MATH_CHECK_THROW(pdf(Distro(std::numeric_limits::infinity()), range(d).first), std::domain_error); - // BOOST_MATH_CHECK_THROW(pdf(Distro(std::numeric_limits::infinity()), range(d).second), std::domain_error); + BOOST_CHECK_THROW(pdf(Distro(std::numeric_limits::infinity()), range(d).first), std::domain_error); + // BOOST_CHECK_THROW(pdf(Distro(std::numeric_limits::infinity()), range(d).second), std::domain_error); } if(std::numeric_limits::has_quiet_NaN) { - BOOST_MATH_CHECK_THROW(pdf(Distro(std::numeric_limits::quiet_NaN()), range(d).first), std::domain_error); + BOOST_CHECK_THROW(pdf(Distro(std::numeric_limits::quiet_NaN()), range(d).first), std::domain_error); } } @@ -138,13 +144,13 @@ void check_out_of_range(typename Distro::value_type p1, typename Distro::value_t check_support(d); if(std::numeric_limits::has_infinity) { - BOOST_MATH_CHECK_THROW(pdf(Distro(std::numeric_limits::infinity(), p2), range(d).first), std::domain_error); - BOOST_MATH_CHECK_THROW(pdf(Distro(p1, std::numeric_limits::infinity()), range(d).first), std::domain_error); + BOOST_CHECK_THROW(pdf(Distro(std::numeric_limits::infinity(), p2), range(d).first), std::domain_error); + BOOST_CHECK_THROW(pdf(Distro(p1, std::numeric_limits::infinity()), range(d).first), std::domain_error); } if(std::numeric_limits::has_quiet_NaN) { - BOOST_MATH_CHECK_THROW(pdf(Distro(std::numeric_limits::quiet_NaN(), p2), range(d).first), std::domain_error); - BOOST_MATH_CHECK_THROW(pdf(Distro(p1, std::numeric_limits::quiet_NaN()), range(d).first), std::domain_error); + BOOST_CHECK_THROW(pdf(Distro(std::numeric_limits::quiet_NaN(), p2), range(d).first), std::domain_error); + BOOST_CHECK_THROW(pdf(Distro(p1, std::numeric_limits::quiet_NaN()), range(d).first), std::domain_error); } } @@ -156,15 +162,15 @@ void check_out_of_range(typename Distro::value_type p1, typename Distro::value_t check_support(d); if(std::numeric_limits::has_infinity) { - BOOST_MATH_CHECK_THROW(pdf(Distro(std::numeric_limits::infinity(), p2, p3), range(d).first), std::domain_error); - BOOST_MATH_CHECK_THROW(pdf(Distro(p1, std::numeric_limits::infinity(), p3), range(d).first), std::domain_error); - BOOST_MATH_CHECK_THROW(pdf(Distro(p1, p2, std::numeric_limits::infinity()), range(d).first), std::domain_error); + BOOST_CHECK_THROW(pdf(Distro(std::numeric_limits::infinity(), p2, p3), range(d).first), std::domain_error); + BOOST_CHECK_THROW(pdf(Distro(p1, std::numeric_limits::infinity(), p3), range(d).first), std::domain_error); + BOOST_CHECK_THROW(pdf(Distro(p1, p2, std::numeric_limits::infinity()), range(d).first), std::domain_error); } if(std::numeric_limits::has_quiet_NaN) { - BOOST_MATH_CHECK_THROW(pdf(Distro(std::numeric_limits::quiet_NaN(), p2, p3), range(d).first), std::domain_error); - BOOST_MATH_CHECK_THROW(pdf(Distro(p1, std::numeric_limits::quiet_NaN(), p3), range(d).first), std::domain_error); - BOOST_MATH_CHECK_THROW(pdf(Distro(p1, p2, std::numeric_limits::quiet_NaN()), range(d).first), std::domain_error); + BOOST_CHECK_THROW(pdf(Distro(std::numeric_limits::quiet_NaN(), p2, p3), range(d).first), std::domain_error); + BOOST_CHECK_THROW(pdf(Distro(p1, std::numeric_limits::quiet_NaN(), p3), range(d).first), std::domain_error); + BOOST_CHECK_THROW(pdf(Distro(p1, p2, std::numeric_limits::quiet_NaN()), range(d).first), std::domain_error); } } diff --git a/test/test_owens_t.cpp b/test/test_owens_t.cpp index 8c33e77f07..11389dd205 100644 --- a/test/test_owens_t.cpp +++ b/test/test_owens_t.cpp @@ -38,9 +38,9 @@ using boost::math::owens_t; #include #include -#include "libs/math/test/handle_test_result.hpp" -#include "libs/math/test/table_type.hpp" -#include "libs/math/test/functor.hpp" +#include "handle_test_result.hpp" +#include "table_type.hpp" +#include "functor.hpp" #include "boost/math/tools/test_value.hpp" #include "test_owens_t.hpp" diff --git a/test/test_owens_t.hpp b/test/test_owens_t.hpp index 995446e5fd..a65ba4af94 100644 --- a/test/test_owens_t.hpp +++ b/test/test_owens_t.hpp @@ -41,30 +41,28 @@ void test_spots(RealType) using ::boost::math::normal_distribution; BOOST_MATH_STD_USING // ADL of std names. - if(std::numeric_limits::digits && (std::numeric_limits::digits < 100)) - { - - // Checks of six sub-methods T1 to T6. - BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(0.0625L), static_cast(0.25L)), static_cast(3.89119302347013668966224771378e-2L), tolerance); // T1 - BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(6.5L), static_cast(0.4375L)), static_cast(2.00057730485083154100907167685E-11L), tolerance); // T2 - BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(7L), static_cast(0.96875L)), static_cast(6.39906271938986853083219914429E-13L), tolerance); // T3 - BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(4.78125L), static_cast(0.0625L)), static_cast(1.06329748046874638058307112826E-7L), tolerance); // T4 - BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(2.L), static_cast(0.5L)), static_cast(8.62507798552150713113488319155E-3L), tolerance); // T5 - BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(1.L), static_cast(0.9999975L)), static_cast(6.67418089782285927715589822405E-2L), tolerance); // T6 - //BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(L), static_cast(L)), static_cast(L), tolerance); - - // BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(L), static_cast(L)), static_cast(L), tolerance); - - // Spots values using Mathematica - BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(6.5L), static_cast(0.4375L)), static_cast(2.00057730485083154100907167684918851101649922551817956120806662022118024594547E-11L), tolerance); - BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(0.4375L), static_cast(6.5L)), static_cast(0.16540130125449396247498691826626273249659241838438244251206819782787761751256L), tolerance); + // Checks of six sub-methods T1 to T6. + BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(0.0625L), static_cast(0.25L)), static_cast(3.89119302347013668966224771378499505568e-2L), tolerance); // T1 + BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(6.5L), static_cast(0.4375L)), static_cast(2.00057730485083154100907167684918851101649922551817956120806662022118025e-11L), tolerance); // T2 + if (boost::math::tools::digits() < 100) // too large error for 128 bit long double + BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(7L), static_cast(0.96875L)), static_cast(6.3990627193898685308321991442891601376479719094145923322318222572484602e-13L), tolerance); // T3 + BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(4.78125L), static_cast(0.0625L)), static_cast(1.06329748046874638058307112826015825291136503488102191050906959246644943e-7L), tolerance); // T4 + BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(2.L), static_cast(0.5L)), static_cast(8.6250779855215071311348831915463718787564119039085429110080944948781288e-3L), tolerance); // T5 + BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(1.L), static_cast(0.9999975L)), static_cast(6.6741808978228592771558982240461689232406934240709035854119334966793020e-2L), tolerance); // T6 + //BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(L), static_cast(L)), static_cast(L), tolerance); + + // BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(L), static_cast(L)), static_cast(L), tolerance); + + // Spots values using Mathematica + BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(6.5L), static_cast(0.4375L)), static_cast(2.00057730485083154100907167684918851101649922551817956120806662022118024594547E-11L), tolerance); + BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(0.4375L), static_cast(6.5L)), static_cast(0.16540130125449396247498691826626273249659241838438244251206819782787761751256L), tolerance); + if (boost::math::tools::digits() < 100) // too large error for 128 bit long double BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(7.L), static_cast(0.96875L)), static_cast(6.39906271938986853083219914428916013764797190941459233223182225724846022843930e-13L), tolerance); - BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(0.96875L), static_cast(7.L)), static_cast(0.08316748474602973770533230453272140919966614259525787470390475393923633179072L), tolerance); - BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(4.78125L), static_cast(0.0625L)), static_cast(1.06329748046874638058307112826015825291136503488102191050906959246644942646701e-7L), tolerance); - BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(0.0625L), static_cast(4.78125L)), static_cast(0.21571185819897989857261253680409017017649352928888660746045361855686569265171L), tolerance); - BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(2.L), static_cast(0.5L)), static_cast(0.00862507798552150713113488319154637187875641190390854291100809449487812876461L), tolerance); - BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(0.5L), static_cast(2L)), static_cast(0.14158060365397839346662819588111542648867283386549027383784843786494855594607L), tolerance); - } + BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(0.96875L), static_cast(7.L)), static_cast(0.08316748474602973770533230453272140919966614259525787470390475393923633179072L), tolerance); + BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(4.78125L), static_cast(0.0625L)), static_cast(1.06329748046874638058307112826015825291136503488102191050906959246644942646701e-7L), tolerance); + BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(0.0625L), static_cast(4.78125L)), static_cast(0.21571185819897989857261253680409017017649352928888660746045361855686569265171L), tolerance); + BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(2.L), static_cast(0.5L)), static_cast(0.00862507798552150713113488319154637187875641190390854291100809449487812876461L), tolerance); + BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast(0.5L), static_cast(2L)), static_cast(0.14158060365397839346662819588111542648867283386549027383784843786494855594607L), tolerance); // check basic properties BOOST_CHECK_EQUAL(owens_t(static_cast(0.5L), static_cast(2L)), owens_t(static_cast(-0.5L), static_cast(2L))); BOOST_CHECK_EQUAL(owens_t(static_cast(0.5L), static_cast(2L)), -owens_t(static_cast(0.5L), static_cast(-2L))); diff --git a/test/test_pareto.cpp b/test/test_pareto.cpp index 35a5bb0098..b59b93f189 100644 --- a/test/test_pareto.cpp +++ b/test/test_pareto.cpp @@ -24,15 +24,19 @@ # pragma warning(disable: 4100) // unreferenced formal parameter. #endif -#include // for real_concept +#include +#include "../include_private/boost/math/tools/test.hpp" + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include // for real_concept +#endif + #define BOOST_TEST_MAIN #include // Boost.Test #include #include using boost::math::pareto_distribution; -#include #include "test_out_of_range.hpp" #include @@ -47,11 +51,13 @@ void check_pareto(RealType scale, RealType shape, RealType x, RealType p, RealType q, RealType tol) { RealType logtol = tol * 10; + #ifndef BOOST_MATH_HAS_GPU_SUPPORT BOOST_IF_CONSTEXPR (std::is_same::value || std::is_same::value) { logtol *= 100; } + #endif BOOST_CHECK_CLOSE_FRACTION( ::boost::math::cdf( diff --git a/test/test_pareto_cdf_double.cu b/test/test_pareto_cdf_double.cu new file mode 100644 index 0000000000..94ca6618a2 --- /dev/null +++ b/test/test_pareto_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::pareto_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch pareto distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::pareto_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_pareto_cdf_float.cu b/test/test_pareto_cdf_float.cu new file mode 100644 index 0000000000..7778900a47 --- /dev/null +++ b/test/test_pareto_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::pareto_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch pareto distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::pareto_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_pareto_cdf_nvrtc_double.cpp b/test/test_pareto_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..55e7ecbb0a --- /dev/null +++ b/test/test_pareto_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_pareto_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::pareto_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_pareto_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_pareto_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_pareto_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::pareto_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_pareto_cdf_nvrtc_float.cpp b/test/test_pareto_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..a9d4f62500 --- /dev/null +++ b/test/test_pareto_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_pareto_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::pareto_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_pareto_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_pareto_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_pareto_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::pareto_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_pareto_pdf_double.cu b/test/test_pareto_pdf_double.cu new file mode 100644 index 0000000000..0eae49b445 --- /dev/null +++ b/test/test_pareto_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::pareto_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch pareto distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::pareto_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_pareto_pdf_float.cu b/test/test_pareto_pdf_float.cu new file mode 100644 index 0000000000..8dbd97311a --- /dev/null +++ b/test/test_pareto_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::pareto_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch pareto distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::pareto_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_pareto_pdf_nvrtc_double.cpp b/test/test_pareto_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..6533303cd6 --- /dev/null +++ b/test/test_pareto_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_pareto_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::pareto_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_pareto_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_pareto_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_pareto_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::pareto_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_pareto_pdf_nvrtc_float.cpp b/test/test_pareto_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..a5d415ae4f --- /dev/null +++ b/test/test_pareto_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_pareto_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::pareto_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_pareto_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_pareto_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_pareto_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::pareto_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_pareto_quan_double.cu b/test/test_pareto_quan_double.cu new file mode 100644 index 0000000000..1d2c47ef93 --- /dev/null +++ b/test/test_pareto_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::pareto_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch pareto distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::pareto_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_pareto_quan_float.cu b/test/test_pareto_quan_float.cu new file mode 100644 index 0000000000..410067ea5c --- /dev/null +++ b/test/test_pareto_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::pareto_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch pareto distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::pareto_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_pareto_quan_nvrtc_double.cpp b/test/test_pareto_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..59444ee351 --- /dev/null +++ b/test/test_pareto_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_pareto_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::pareto_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_pareto_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_pareto_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_pareto_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::pareto_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_pareto_quan_nvrtc_float.cpp b/test/test_pareto_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..75eda6bee4 --- /dev/null +++ b/test/test_pareto_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_pareto_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::pareto_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_pareto_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_pareto_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_pareto_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::pareto_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_poisson.cpp b/test/test_poisson.cpp index 9b75ce162f..2a0bc4c499 100644 --- a/test/test_poisson.cpp +++ b/test/test_poisson.cpp @@ -23,19 +23,24 @@ # pragma warning(disable: 4127) // conditional expression is constant. #endif +#include + #define BOOST_TEST_MAIN #include // Boost.Test #include +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include // for real_concept +#endif + #include using boost::math::poisson_distribution; -#include // for real_concept #include // for (incomplete) gamma. // using boost::math::qamma_Q; #include "table_type.hpp" #include "test_out_of_range.hpp" +#include "../include_private/boost/math/tools/test.hpp" #include using std::cout; @@ -53,12 +58,12 @@ void test_spots(RealType) // guaranteed for type RealType, eg 6 for float, 15 for double, // expressed as a percentage (so -2) for BOOST_CHECK_CLOSE, - int decdigits = numeric_limits::digits10; + int decdigits = std::numeric_limits::digits10; // May eb >15 for 80 and 128-bit FP types. if (decdigits <= 0) { // decdigits is not defined, for example real concept, // so assume precision of most test data is double (for example, MathCAD). - decdigits = numeric_limits::digits10; // == 15 for 64-bit + decdigits = std::numeric_limits::digits10; // == 15 for 64-bit } if (decdigits > 15 ) // numeric_limits::digits10) { // 15 is the accuracy of the MathCAD test data. @@ -106,6 +111,7 @@ void test_spots(RealType) using ::boost::math::pdf; // Check that bad arguments throw. + #ifndef BOOST_MATH_NO_EXCEPTIONS BOOST_MATH_CHECK_THROW( cdf(poisson_distribution(static_cast(0)), // mean zero is bad. static_cast(0)), // even for a good k. @@ -155,6 +161,7 @@ void test_spots(RealType) quantile(complement(poisson_distribution(static_cast(1)), static_cast(0))), // bad probability. std::overflow_error); + #endif BOOST_CHECK_EQUAL( quantile(poisson_distribution(static_cast(1)), @@ -559,6 +566,7 @@ BOOST_AUTO_TEST_CASE( test_main ) // poisson mydudpoisson(0.); // throws (if BOOST_MATH_DOMAIN_ERROR_POLICY == throw_on_error). +#ifndef BOOST_MATH_NO_EXCEPTIONS #ifndef BOOST_NO_EXCEPTIONS BOOST_MATH_CHECK_THROW(poisson mydudpoisson(-1), std::domain_error);// Mean must be > 0. BOOST_MATH_CHECK_THROW(poisson mydudpoisson(-1), std::logic_error);// Mean must be > 0. @@ -570,7 +578,7 @@ BOOST_AUTO_TEST_CASE( test_main ) // BOOST_MATH_CHECK_THROW(poisson mydudpoisson(-1), std::overflow_error); // fails the check // because overflow_error is unrelated - except from std::exception BOOST_MATH_CHECK_THROW(cdf(mypoisson, -1), std::domain_error); // k must be >= 0 - +#endif BOOST_CHECK_EQUAL(mean(mypoisson), 4.); BOOST_CHECK_CLOSE( pdf(mypoisson, 2.), // k events = 2. @@ -644,7 +652,7 @@ BOOST_AUTO_TEST_CASE( test_main ) test_spots(0.0); // Test double. #endif #ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS - if (numeric_limits::digits10 > numeric_limits::digits10) + if (std::numeric_limits::digits10 > std::numeric_limits::digits10) { // long double is better than double (so not MSVC where they are same). #ifdef TEST_LDOUBLE test_spots(0.0L); // Test long double. diff --git a/test/test_poisson_cdf_double.cu b/test/test_poisson_cdf_double.cu new file mode 100644 index 0000000000..34ca74a622 --- /dev/null +++ b/test/test_poisson_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::poisson_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch poisson distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::poisson_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_poisson_cdf_float.cu b/test/test_poisson_cdf_float.cu new file mode 100644 index 0000000000..0c024f6692 --- /dev/null +++ b/test/test_poisson_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::poisson_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch poisson distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::poisson_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_poisson_cdf_nvrtc_double.cpp b/test/test_poisson_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..81ef15adc6 --- /dev/null +++ b/test/test_poisson_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_poisson_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::poisson_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_poisson_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_poisson_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_poisson_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::poisson_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_poisson_cdf_nvrtc_float.cpp b/test/test_poisson_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..8e53303873 --- /dev/null +++ b/test/test_poisson_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_poisson_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::poisson_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_poisson_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_poisson_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_poisson_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::poisson_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_poisson_pdf_double.cu b/test/test_poisson_pdf_double.cu new file mode 100644 index 0000000000..616fe0ba4f --- /dev/null +++ b/test/test_poisson_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::poisson_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch poisson distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::poisson_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_poisson_pdf_float.cu b/test/test_poisson_pdf_float.cu new file mode 100644 index 0000000000..81ac558999 --- /dev/null +++ b/test/test_poisson_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::poisson_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch poisson distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::poisson_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_poisson_pdf_nvrtc_double.cpp b/test/test_poisson_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..514963d638 --- /dev/null +++ b/test/test_poisson_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_poisson_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::poisson_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_poisson_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_poisson_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_poisson_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::poisson_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_poisson_pdf_nvrtc_float.cpp b/test/test_poisson_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..9b79360094 --- /dev/null +++ b/test/test_poisson_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_poisson_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::poisson_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_poisson_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_poisson_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_poisson_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::poisson_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_poisson_quan_double.cu b/test/test_poisson_quan_double.cu new file mode 100644 index 0000000000..b1ef1a17c5 --- /dev/null +++ b/test/test_poisson_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::poisson_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch poisson distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::poisson_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_poisson_quan_float.cu b/test/test_poisson_quan_float.cu new file mode 100644 index 0000000000..82a28bd882 --- /dev/null +++ b/test/test_poisson_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::poisson_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch poisson distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::poisson_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_poisson_quan_nvrtc_double.cpp b/test/test_poisson_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..572d57a257 --- /dev/null +++ b/test/test_poisson_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_poisson_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::poisson_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_poisson_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_poisson_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_poisson_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::poisson_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_poisson_quan_nvrtc_float.cpp b/test/test_poisson_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..34bd1ea53c --- /dev/null +++ b/test/test_poisson_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_poisson_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::poisson_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_poisson_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_poisson_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_poisson_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::poisson_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_rayleigh.cpp b/test/test_rayleigh.cpp index de92dfa848..0d4ebf2a80 100644 --- a/test/test_rayleigh.cpp +++ b/test/test_rayleigh.cpp @@ -13,10 +13,15 @@ # pragma warning(disable: 4100) // unreferenced formal parameter. #endif +#include + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include // for real_concept +#endif + #include using boost::math::rayleigh_distribution; -#include +#include "../include_private/boost/math/tools/test.hpp" #define BOOST_TEST_MAIN #include // Boost.Test @@ -36,11 +41,13 @@ void test_spot(RealType s, RealType x, RealType p, RealType q, RealType toleranc { RealType logtolerance = tolerance; + #ifndef BOOST_MATH_HAS_GPU_SUPPORT BOOST_IF_CONSTEXPR (std::is_same::value || std::is_same::value) { logtolerance *= 100; } + #endif BOOST_CHECK_CLOSE( ::boost::math::cdf( diff --git a/test/test_rayleigh_cdf_double.cu b/test/test_rayleigh_cdf_double.cu new file mode 100644 index 0000000000..d6056dcaf1 --- /dev/null +++ b/test/test_rayleigh_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::rayleigh_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::rayleigh_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_rayleigh_cdf_float.cu b/test/test_rayleigh_cdf_float.cu new file mode 100644 index 0000000000..2c86ec1ba3 --- /dev/null +++ b/test/test_rayleigh_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::rayleigh_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::rayleigh_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_rayleigh_cdf_nvrtc_double.cpp b/test/test_rayleigh_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..f57595d195 --- /dev/null +++ b/test/test_rayleigh_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_rayleigh_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::rayleigh_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_rayleigh_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_rayleigh_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_rayleigh_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::rayleigh_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_rayleigh_cdf_nvrtc_float.cpp b/test/test_rayleigh_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..aeeaacadfb --- /dev/null +++ b/test/test_rayleigh_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_rayleigh_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::rayleigh_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_rayleigh_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_rayleigh_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_rayleigh_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::rayleigh_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_rayleigh_pdf_double.cu b/test/test_rayleigh_pdf_double.cu new file mode 100644 index 0000000000..b83ae3cbf0 --- /dev/null +++ b/test/test_rayleigh_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::rayleigh_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::rayleigh_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_rayleigh_pdf_float.cu b/test/test_rayleigh_pdf_float.cu new file mode 100644 index 0000000000..a5bfee42da --- /dev/null +++ b/test/test_rayleigh_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::rayleigh_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::rayleigh_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_rayleigh_pdf_nvrtc_double.cpp b/test/test_rayleigh_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..17662bba07 --- /dev/null +++ b/test/test_rayleigh_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_rayleigh_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::rayleigh_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_rayleigh_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_rayleigh_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_rayleigh_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::rayleigh_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_rayleigh_pdf_nvrtc_float.cpp b/test/test_rayleigh_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..82cc534e8b --- /dev/null +++ b/test/test_rayleigh_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_rayleigh_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::rayleigh_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_rayleigh_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_rayleigh_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_rayleigh_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::rayleigh_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_rayleigh_quan_double.cu b/test/test_rayleigh_quan_double.cu new file mode 100644 index 0000000000..65084e57cf --- /dev/null +++ b/test/test_rayleigh_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::rayleigh_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::rayleigh_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_rayleigh_quan_float.cu b/test/test_rayleigh_quan_float.cu new file mode 100644 index 0000000000..7a03396646 --- /dev/null +++ b/test/test_rayleigh_quan_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::rayleigh_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::rayleigh_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_rayleigh_quan_nvrtc_double.cpp b/test/test_rayleigh_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..c105b50470 --- /dev/null +++ b/test/test_rayleigh_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_rayleigh_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::rayleigh_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_rayleigh_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_rayleigh_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_rayleigh_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::rayleigh_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_rayleigh_quan_nvrtc_float.cpp b/test/test_rayleigh_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..88c9418a32 --- /dev/null +++ b/test/test_rayleigh_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_rayleigh_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::rayleigh_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_rayleigh_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_rayleigh_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_rayleigh_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::rayleigh_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_round.cpp b/test/test_round.cpp index 95ff4d234e..e603aa510d 100644 --- a/test/test_round.cpp +++ b/test/test_round.cpp @@ -3,12 +3,20 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wimplicit-const-int-float-conversion" +#endif #include #define BOOST_TEST_MAIN +#include +#include "../include_private/boost/math/tools/test.hpp" #include -#include #include #include #include @@ -222,6 +230,7 @@ void test_round(T, const char* name ) // // Finish off by testing the error handlers: // + #ifndef BOOST_MATH_NO_EXCEPTIONS BOOST_MATH_CHECK_THROW(iround(static_cast(1e20)), boost::math::rounding_error); BOOST_MATH_CHECK_THROW(iround(static_cast(-1e20)), boost::math::rounding_error); BOOST_MATH_CHECK_THROW(lround(static_cast(1e20)), boost::math::rounding_error); @@ -314,6 +323,7 @@ void test_round(T, const char* name ) BOOST_MATH_CHECK_THROW(llround(static_cast((std::numeric_limits::min)()) - 1), boost::math::rounding_error); } #endif + #endif // // try non-throwing error handlers: // diff --git a/test/test_round_double.cu b/test/test_round_double.cu new file mode 100644 index 0000000000..3dae4342d2 --- /dev/null +++ b/test/test_round_double.cu @@ -0,0 +1,98 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::round(in[i]) + boost::math::iround(in[i]) + boost::math::lround(in[i]) + boost::math::llround(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(4 * boost::math::round(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(h_C[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/test_round_float.cu b/test/test_round_float.cu new file mode 100644 index 0000000000..45dd14c03a --- /dev/null +++ b/test/test_round_float.cu @@ -0,0 +1,98 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::round(in[i]) + boost::math::iround(in[i]) + boost::math::lround(in[i]) + boost::math::llround(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(4 * boost::math::round(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(h_C[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/test_round_nvrtc_double.cpp b/test/test_round_nvrtc_double.cpp new file mode 100644 index 0000000000..228e3dd674 --- /dev/null +++ b/test/test_round_nvrtc_double.cpp @@ -0,0 +1,194 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_round_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::round(in1[i]) + + boost::math::lround(in1[i]) + + boost::math::llround(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_round_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_round_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_round_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::round(h_in1[i]) + + boost::math::lround(h_in1[i]) + + boost::math::llround(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_round_nvrtc_float.cpp b/test/test_round_nvrtc_float.cpp new file mode 100644 index 0000000000..8554add7cd --- /dev/null +++ b/test/test_round_nvrtc_float.cpp @@ -0,0 +1,194 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_round_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::round(in1[i]) + + boost::math::lround(in1[i]) + + boost::math::llround(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_round_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_round_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_round_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::round(h_in1[i]) + + boost::math::lround(h_in1[i]) + + boost::math::llround(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_saspoint5.cpp b/test/test_saspoint5.cpp new file mode 100644 index 0000000000..0703b3c547 --- /dev/null +++ b/test/test_saspoint5.cpp @@ -0,0 +1,987 @@ +// Copyright Takuma Yoshimura 2024. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_TEST_MAIN +#define BOOST_TEST_MODULE StatsSaSpoint5Test +#include +#include +#include + +#include + +#if __has_include() +# include +#endif + +using boost::math::saspoint5_distribution; + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +#include +using boost::multiprecision::cpp_bin_float_quad; +#endif + +template +void do_test_saspoint5_pdf(){ + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + saspoint5_distribution dist(static_cast(0), static_cast(1)); + + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.65057384221262866484014802392420311075288403543570e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.79329640523490376041131493419821198600076403419386e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.95852304370396879516224023732198088002401888082616e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.15165745993244539388932384769132623478818505719254e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.37991930003932826612228434406591616554240049257806e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.65315905048420909110038030496372707676321065455853e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.98514782971078642902580240066249560381976244868252e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.39569569206548434200616180477229996876301302607033e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.91428580496513429479068747515164587814473831035141e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.22747038893846641865142698258984886289652875016810e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.58664432176873307685856460747711890398474760309135e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.00211113907132159419276991035240896253998639909692e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.48740303942235865293078214071409846807501918299929e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.06062847902700317213276816720705695621399130112975e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.74654707859965380374478388584454386653113140787247e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.57987022193031994457557921317658430036146780413966e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.61071469126041183247373313827161939453635781053656e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.22226841107155902731102159116498022415283445125970e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.91402704796678469983705063964301389721348302634810e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.07020274682586388030767450697192728366834992164365e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.16067678880616390663660038956460612725510361666743e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.26548452861398188870931570373322249360362213948984e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38813584257594010463041381178865326769684234839162e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.53335207865128979807776513257284069794567700115319e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.70762401725206223811383500786268939644546879037607e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.46875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.80837633035190650461893908063251050472045384954446e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.92009924039028830545431687288769864540254439571691e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.40625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.04458865222194329452641393707783622253522148720927e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.18403802814813998631607652585109350067000954260159e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.34375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.34114858767964975455487653703370079404113295500766e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.51927454673218629749392967500746624956301580234179e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.28125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.72261384495736629760803695406845321684342258638651e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.95645445681747568731488283573032414811445124048278e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.234375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.08683711301287379599769504729521857091008674908403e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.21875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.22747777451211671698876953149906906288207621283740e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.203125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.37948342659965914836420291897431271791569870739942e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.54408462624872191505363112311495187968578947455635e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.171875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.72263350586688041299651258469337654343439789762344e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.15625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.91658567295250072264436343404335704809676265611623e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.140625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.12745086277094097472011430871049070050383274543942e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.35668401768623200524372663239480799018368629449958e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.109375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.60546312221207321659735213679473165482665805082365e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.09375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.87425051951658593562735199923143476116556674561953e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.078125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.16193612095139713576014676574899762241637086348666e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.46416716200748206779925127900698754118588044244443e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.046875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.77011156717390291621839083396306231100301720494794e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.03125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.05643252618763782827233685490846659295644445464474e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(-0.015625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.27802677165093171536432271430054096075199851236277e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.36619772367581343075535053490057448137838582961826e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.015625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.27802677165093171536432271430054096075199851236277e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.03125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.05643252618763782827233685490846659295644445464474e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.046875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.77011156717390291621839083396306231100301720494794e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.46416716200748206779925127900698754118588044244443e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.078125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.16193612095139713576014676574899762241637086348666e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.09375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.87425051951658593562735199923143476116556674561953e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.109375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.60546312221207321659735213679473165482665805082365e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.35668401768623200524372663239480799018368629449958e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.140625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.12745086277094097472011430871049070050383274543942e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.15625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.91658567295250072264436343404335704809676265611623e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.171875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.72263350586688041299651258469337654343439789762344e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.54408462624872191505363112311495187968578947455635e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.203125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.37948342659965914836420291897431271791569870739942e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.21875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.22747777451211671698876953149906906288207621283740e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.234375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.08683711301287379599769504729521857091008674908403e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.95645445681747568731488283573032414811445124048278e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.28125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.72261384495736629760803695406845321684342258638651e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.51927454673218629749392967500746624956301580234179e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.34375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.34114858767964975455487653703370079404113295500766e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.18403802814813998631607652585109350067000954260159e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.40625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.04458865222194329452641393707783622253522148720927e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.92009924039028830545431687288769864540254439571691e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.46875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.80837633035190650461893908063251050472045384954446e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.70762401725206223811383500786268939644546879037607e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.53335207865128979807776513257284069794567700115319e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38813584257594010463041381178865326769684234839162e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.26548452861398188870931570373322249360362213948984e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.16067678880616390663660038956460612725510361666743e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.07020274682586388030767450697192728366834992164365e-1), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.91402704796678469983705063964301389721348302634810e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.22226841107155902731102159116498022415283445125970e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.61071469126041183247373313827161939453635781053656e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.57987022193031994457557921317658430036146780413966e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.74654707859965380374478388584454386653113140787247e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.06062847902700317213276816720705695621399130112975e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.48740303942235865293078214071409846807501918299929e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.00211113907132159419276991035240896253998639909692e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.58664432176873307685856460747711890398474760309135e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.22747038893846641865142698258984886289652875016810e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.91428580496513429479068747515164587814473831035141e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.39569569206548434200616180477229996876301302607033e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.98514782971078642902580240066249560381976244868252e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.65315905048420909110038030496372707676321065455853e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.37991930003932826612228434406591616554240049257806e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.15165745993244539388932384769132623478818505719254e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.95852304370396879516224023732198088002401888082616e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.79329640523490376041131493419821198600076403419386e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.65057384221262866484014802392420311075288403543570e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41709229627396868333284301965586098495341505333984e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.23486804023715403906392442935982382364217653266772e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.08928004905124817201928007015908544910529569714342e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.70693662562544123549117032234725245555978806476399e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.72521377242204601673499906339041894791681660981780e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.90118208003638158949103509569466271651423947601700e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(7.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.20122199033087670451578082577537221533524657616689e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(8)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.60044810497290557552736366450372523266254544297541e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(9)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.62568921883700157286068393009259359194911515759460e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(10)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.87225538372111615796471446753804828223647849403478e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(11)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.27521869884978603565623102805698827447589635270965e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(12)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.79233594205638037575121942287372720508125859480601e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(13)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.39503653350126602107748523248767674409207708020015e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(14)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.06336366766043497753645671560287680725185122482037e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(15)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.78299184327161285281451711213360177146934142134357e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(16)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.54339461777955741686401041938275102206830951452701e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(18)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.15681965291001751496355126714032815564803295371462e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(20)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.85998635069315959390913560004569618374730778555584e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(22)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.62608332259613979271666391679559272984023328035102e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(24)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.43781372306723685704327334057789272970358761256651e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(26)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28356349911089078230470059243188541053449454702148e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(28)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.15527095704472843921748976306796551576061214402975e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(30)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.04718107132157571106811924450948266398728667364202e-3), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(32)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.55085695067883584460317653567009454037053097890033e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(36)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.07065059415060466523722913682664006023733154400679e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(40)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.93920671056491824666303660784084772546983031349823e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(44)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.05105752932345415433215445597062488386383300986381e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(48)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.33854958380909922933873661422920090995959934982337e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(52)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.75647800724537927733394825316181366013592737695448e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(56)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.27359757459764665859327891341356484542630879530500e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(60)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.86768631746691871635029152331878689826668170171440e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(64)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.52254909340415842254122300688608224784456787996634e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(72)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.96929220521963282370464091269016751022713088743640e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(80)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.54771289809160518824087477899271848785553979294988e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(88)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.21766390014321750130289247373333697019024113873128e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(96)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.95349215386026125880558883477711894508813402521643e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(104)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.73811374936907293362277490498742346151511716286084e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(112)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55975461659552125943580943693269482505834989982945e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(120)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41006225150819455490222552628817791206744288460867e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28296324273402304808354336538549844914883481532620e-4), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(256)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.63180321475387230691526892287847669196997380044025e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(512)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.66189768326912251607424407981263646223230063365226e-5), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, static_cast(1024)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.93708011998897739786377954224930716533616609090213e-6), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 11)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.11452967162743112245198615236484567129951718982008e-6), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 12)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.51482080833564338340856966996445313498079083789150e-7), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 13)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.66663174159211670027542189178751103023898387998523e-7), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 14)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.45238046039194445466823557091765258247216574897413e-8), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 15)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.34803516847185617806978815778928715951216310353916e-8), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 16)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.18523956611649861747653810989133680515605456258881e-8), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 17)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.19428394481581920558479056092124822410228982935516e-9), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 18)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.48386121491270229946980639019289494044106626390164e-9), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 19)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.24863743761365464204033370116809431054495238229389e-10), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 20)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.85627269567198410070703889816912945617135110666962e-10), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 21)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.56441321254434701892659687588069630508140502616495e-11), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 22)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.32124514546671241752437061147193743959791071682877e-11), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 23)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.20777750941928700289773286319085009877829580100394e-12), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 24)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.90212173401654051439660118394241371322264056105160e-12), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.02611352372467016891796952205325499928128114461973e-12), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 26)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.62800552193521241168029742332979605534754328327140e-13), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 27)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28273024622868202243706785685607188210679822726442e-13), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 28)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.53522776159999787175268870767580373267754737032134e-14), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 29)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.60346802350897629386145672448930505388450307742542e-14), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 30)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.66917274294807913528606004426646232913424627695599e-15), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 31)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.00436953991320274750778716874695704836278902566390e-15), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 32)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.08655220551513998962815689594959332706015306211632e-16), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 34)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.85824418029941626524148561830736428793658611772587e-17), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 36)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.10728389276235510083072832489179009106826041868534e-17), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 38)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38410697234730043795694534456878302211221269628364e-18), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 40)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.73013503193177856102694285471291153594530345945373e-19), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 42)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.16266961272612513551592941445333504133461857377298e-20), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 44)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.70333753016489787852265460543736089676535476354358e-21), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 46)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.37917223411693427580584068017775560157489239248184e-22), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 48)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.22396549352793655444565334974277876088154820156655e-23), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 50)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.27995699246102965377026386970340969776925728507708e-24), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 52)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.59994631904573126677044584818697892085174708564778e-25), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 54)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.24993294785056705165902599588868178759415543962830e-26), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 56)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.03124162154653357770314062605026847259176035189343e-26), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 58)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28905202884892491063675049720005913844624989838025e-27), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 60)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.61131503725850485091152332729977897011733755140179e-28), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 62)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.01414379732147400935170577498436447103578962194983e-29), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 64)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.51767974711955685286218349809111390785355323964808e-30), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 68)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.93387460542241032622871979569574240917519314612402e-32), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 72)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.14667907118661876897809516737349031422411025903664e-34), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 76)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.60418604881272566803193825546858269428369981726767e-36), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 80)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.50065407013025533256374121391266749023560191365933e-37), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 84)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.34477198457980010827684344075634340204878641095256e-39), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 88)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.66370622590643616572397339764831428700168401193312e-41), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 92)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.72454097797900123415519497865404920881954749456838e-43), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 96)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.94459527809226549290322908699069488543719245241219e-45), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 100)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.39759301220191945453705676879390798465474357652431e-46), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 104)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.18373908156550030836685714809779087770458087234145e-48), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 108)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.41209231494609468520317755439405311521523337977429e-50), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 112)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.33139424210327312273151307737011597702780857146510e-52), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 116)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.33030350328636432344828142894916983611711484679173e-54), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 120)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.30160992238849442824114913911543137689436308410131e-55), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 124)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.03376550373202254518240301652494105481741383790265e-57), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.17775859958128522725985138779564208904631966988869e-59), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.17775859958128522725985138779564208904631966988869e-59), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 136)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.75819970600899713717946709855764494008629724492550e-63), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 144)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.89409172509985281669900618169013228855150809470644e-66), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 152)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.62424737573206254076979053560233289660261329999266e-70), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 160)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.12896664446583558124263581916004539046704057066258e-73), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 168)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.75626622184041889951815556079707845048600270288339e-77), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 176)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.72916558066508520390174723482957902400087321179181e-81), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 184)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.64286269059206181735882501247406923061001590063666e-84), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 192)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.01089524070327592128619387871640956557675279607402e-88), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 200)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.79222470874823222970262177430243561283276571668161e-92), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 208)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.39067986053423638420474164411821626475827252277446e-95), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 216)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.83662075325741304737485752958570418264028678048960e-99), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 224)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.42495623858823560726925232655901309474567221057157e-102), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 232)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.47889706686580958805969806288821606452354641951732e-106), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 240)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.49340104215285543959887222384818382775213055976161e-110), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 248)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.07358423880684947255831841402543550598919723533894e-113), tolerance); + BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast(1), 256)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.06246152052453484511308206549178590347199333203433e-117), tolerance); +} + +template +void do_test_saspoint5_cdf() { + // + // Basic sanity checks, tolerance is either 5 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 5; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + saspoint5_distribution dist(static_cast(0), static_cast(1)); + + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.63772802979087199762340235165979754525757604354946e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.68073422703516098355866522837852256596174289760926e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.72758003279298484112934948051066338979626419965674e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.77889244858892026253876673752518849640459231269905e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.83545491848533781581075416061045687904524946502798e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.89826216632004296533051251749295777627320352902276e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.96860081561627764765290065134912151446894162470366e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.04817161935798121677981868384882359833404907918648e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.13928162275383718405630406427822962549133076160983e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.19012422407024648274998126765803685575224367865758e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.24515968592015292591213160991283529212368507398551e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.30502444449412087831256855850010593081747948385277e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.37050277816586024016699127500442336442368833061485e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.44257500572741387471454711747681504603506105875273e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.52248669635662507718372623246751628200366838868998e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.61185078990430196469782036568483593931021694091782e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.71280312689343266367958859259591543958449635451967e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.76849301027297680034801351032711397512725349083505e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.82824834763715399283433728056507088064285422281461e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.89261854625965316722646909500096608624627558333255e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.96226648191670403196061310683376239616840103866983e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.03800118279447955882543375356729458148212335628057e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.12082296340545811234455810077892380496395042488318e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.21198706286302016964660139725707925695690953535621e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.31309550000758082761278726632760757082965788779064e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.46875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.36800649890180441699589040706573844882916813862746e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.42623318957604837956085530683032551753868317236287e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.40625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.48814554885607965224431388327096964758526950639963e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.55417563843010824151990624091505209837963398189766e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.34375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.62483167142022658662698467701475815382565060700827e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.70071604494776495147661422048036562690579901259931e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.28125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.78254859359158098510888509896172431097522748703865e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.87119665000174806422420129219814480076318292452197e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.234375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.91839717977573455659490354583900768691773107778703e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.21875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.96771371640645784475148651296909803541218278211201e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.203125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.01931503128208807344707441840522746424640776733482e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.07338815387886469109799016905025356718337763965952e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.171875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.13014030144742808960599738063482904351064602625491e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.15625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.18980066583859588646492309993087622866113891170991e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.140625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.25262169970425461499760639161103103516585531687363e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.31887921568009055676985827521151927561905069756334e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.109375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.38886999325100940692217415763934459424688336515323e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.09375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.46290440538810971901075463907912052110770078557001e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.078125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.54128928252324244858536397279015277684698852998387e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.62429173448713128429248489740624277005917720602166e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.046875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.71206593195666184191280277407391829224588075252830e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.03125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.80450931389242086819408627721643501783105413645046e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(-0.015625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.90099745314478063577240966101000818037412904585595e-1), tolerance); + BOOST_CHECK_EQUAL(cdf(dist, static_cast(0)), static_cast(0.5)); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.015625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.09900254685521936422759033898999181962587095414405e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.03125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.19549068610757913180591372278356498216894586354954e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.046875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.28793406804333815808719722592608170775411924747170e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.37570826551286871570751510259375722994082279397834e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.078125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.45871071747675755141463602720984722315301147001613e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.09375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.53709559461189028098924536092087947889229921442999e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.109375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.61113000674899059307782584236065540575311663484677e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.68112078431990944323014172478848072438094930243666e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.140625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.74737830029574538500239360838896896483414468312637e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.15625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.81019933416140411353507690006912377133886108829009e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.171875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.86985969855257191039400261936517095648935397374509e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.92661184612113530890200983094974643281662236034048e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.203125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.98068496871791192655292558159477253575359223266518e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.21875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.03228628359354215524851348703090196458781721788799e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.234375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.08160282022426544340509645416099231308226892221297e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.12880334999825193577579870780185519923681707547803e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.28125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.21745140640841901489111490103827568902477251296135e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.29928395505223504852338577951963437309420098740069e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.34375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.37516832857977341337301532298524184617434939299173e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.44582436156989175848009375908494790162036601810234e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.40625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.51185445114392034775568611672903035241473049360037e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.57376681042395162043914469316967448246131682763713e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.46875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.63199350109819558300410959293426155117083186137254e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.68690449999241917238721273367239242917034211220936e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.78801293713697983035339860274292074304309046464379e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.87917703659454188765544189922107619503604957511682e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.96199881720552044117456624643270541851787664371943e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.03773351808329596803938689316623760383159896133017e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.10738145374034683277353090499903391375372441666745e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.17175165236284600716566271943492911935714577718539e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.23150698972702319965198648967288602487274650916495e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.28719687310656733632041140740408456041550364548033e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.38814921009569803530217963431516406068978305908218e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.47751330364337492281627376753248371799633161131002e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.55742499427258612528545288252318495396493894124727e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.62949722183413975983300872499557663557631166938515e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.69497555550587912168743144149989406918252051614723e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.75484031407984707408786839008716470787631492601449e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.80987577592975351725001873234196314424775632134242e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.86071837724616281594369593572177037450866923839017e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.95182838064201878322018131615117640166595092081352e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.03139918438372235234709934865087848553105837529634e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.10173783367995703466948748250704222372679647097724e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.16454508151466218418924583938954312095475053497202e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.22110755141107973746123326247481150359540768730095e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.27241996720701515887065051948933661020373580034326e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.31926577296483901644133477162147743403825710239074e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.36227197020912800237659764834020245474242395645054e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.43871020523024507614851721583135267180360280594143e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.50483092818015575884212092733928976209364127755594e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.56280494417603102056412672973803628491156640138235e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.61420731826426116856340074125786936047174397075840e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.66021349395697519291462973006173132761227963124588e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.70172157747564156568159926049840862439194389470164e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(7.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.73943162478672589326613255747804240803057053984280e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist, static_cast(8)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.77389877435125713385213180379500898219171021326151e-1), tolerance); +} + +template +void do_test_saspoint5_ccdf() { + // + // Basic sanity checks, tolerance is either 5 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 5; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + saspoint5_distribution dist(static_cast(0), static_cast(1)); + + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-2))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.86071837724616281594369593572177037450866923839017e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.80987577592975351725001873234196314424775632134242e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.75484031407984707408786839008716470787631492601449e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.69497555550587912168743144149989406918252051614723e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.62949722183413975983300872499557663557631166938515e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.55742499427258612528545288252318495396493894124727e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.47751330364337492281627376753248371799633161131002e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.38814921009569803530217963431516406068978305908218e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-1))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.28719687310656733632041140740408456041550364548033e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.9375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.23150698972702319965198648967288602487274650916495e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.17175165236284600716566271943492911935714577718539e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.8125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.10738145374034683277353090499903391375372441666745e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.03773351808329596803938689316623760383159896133017e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.6875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.96199881720552044117456624643270541851787664371943e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.87917703659454188765544189922107619503604957511682e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.5625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.78801293713697983035339860274292074304309046464379e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.68690449999241917238721273367239242917034211220936e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.46875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.63199350109819558300410959293426155117083186137254e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.4375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.57376681042395162043914469316967448246131682763713e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.40625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.51185445114392034775568611672903035241473049360037e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.44582436156989175848009375908494790162036601810234e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.34375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.37516832857977341337301532298524184617434939299173e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.3125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.29928395505223504852338577951963437309420098740069e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.28125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.21745140640841901489111490103827568902477251296135e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.12880334999825193577579870780185519923681707547803e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.234375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.08160282022426544340509645416099231308226892221297e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.21875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.03228628359354215524851348703090196458781721788799e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.203125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.98068496871791192655292558159477253575359223266518e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.1875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.92661184612113530890200983094974643281662236034048e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.171875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.86985969855257191039400261936517095648935397374509e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.15625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.81019933416140411353507690006912377133886108829009e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.140625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.74737830029574538500239360838896896483414468312637e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.68112078431990944323014172478848072438094930243666e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.109375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.61113000674899059307782584236065540575311663484677e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.09375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.53709559461189028098924536092087947889229921442999e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.078125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.45871071747675755141463602720984722315301147001613e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.0625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.37570826551286871570751510259375722994082279397834e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.046875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.28793406804333815808719722592608170775411924747170e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.03125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.19549068610757913180591372278356498216894586354954e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(-0.015625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.09900254685521936422759033898999181962587095414405e-1), tolerance); + BOOST_CHECK_EQUAL(cdf(dist, static_cast(0)), static_cast(0.5)); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.015625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.90099745314478063577240966101000818037412904585595e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.03125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.80450931389242086819408627721643501783105413645046e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.046875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.71206593195666184191280277407391829224588075252830e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.0625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.62429173448713128429248489740624277005917720602166e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.078125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.54128928252324244858536397279015277684698852998387e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.09375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.46290440538810971901075463907912052110770078557001e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.109375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.38886999325100940692217415763934459424688336515323e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.31887921568009055676985827521151927561905069756334e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.140625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.25262169970425461499760639161103103516585531687363e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.15625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.18980066583859588646492309993087622866113891170991e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.171875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.13014030144742808960599738063482904351064602625491e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.1875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.07338815387886469109799016905025356718337763965952e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.203125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.01931503128208807344707441840522746424640776733482e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.21875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.96771371640645784475148651296909803541218278211201e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.234375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.91839717977573455659490354583900768691773107778703e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.87119665000174806422420129219814480076318292452197e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.28125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.78254859359158098510888509896172431097522748703865e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.3125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.70071604494776495147661422048036562690579901259931e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.34375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.62483167142022658662698467701475815382565060700827e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.55417563843010824151990624091505209837963398189766e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.40625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.48814554885607965224431388327096964758526950639963e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.4375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.42623318957604837956085530683032551753868317236287e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.46875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.36800649890180441699589040706573844882916813862746e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.31309550000758082761278726632760757082965788779064e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.5625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.21198706286302016964660139725707925695690953535621e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.12082296340545811234455810077892380496395042488318e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.6875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.03800118279447955882543375356729458148212335628057e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.96226648191670403196061310683376239616840103866983e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.8125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.89261854625965316722646909500096608624627558333255e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.82824834763715399283433728056507088064285422281461e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(0.9375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.76849301027297680034801351032711397512725349083505e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.71280312689343266367958859259591543958449635451967e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.61185078990430196469782036568483593931021694091782e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.52248669635662507718372623246751628200366838868998e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.44257500572741387471454711747681504603506105875273e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.37050277816586024016699127500442336442368833061485e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.30502444449412087831256855850010593081747948385277e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.24515968592015292591213160991283529212368507398551e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.19012422407024648274998126765803685575224367865758e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(2))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.13928162275383718405630406427822962549133076160983e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(2.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.04817161935798121677981868384882359833404907918648e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(2.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.96860081561627764765290065134912151446894162470366e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(2.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.89826216632004296533051251749295777627320352902276e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(3))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.83545491848533781581075416061045687904524946502798e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(3.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.77889244858892026253876673752518849640459231269905e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(3.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.72758003279298484112934948051066338979626419965674e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(3.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.68073422703516098355866522837852256596174289760926e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(4))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.63772802979087199762340235165979754525757604354946e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(4.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.56128979476975492385148278416864732819639719405857e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.49516907181984424115787907266071023790635872244406e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(5.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.43719505582396897943587327026196371508843359861765e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(6))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38579268173573883143659925874213063952825602924160e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(6.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.33978650604302480708537026993826867238772036875412e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(7))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.29827842252435843431840073950159137560805610529836e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(7.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.26056837521327410673386744252195759196942946015720e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(8))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.22610122564874286614786819620499101780828978673850e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(9))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.16519065397529967010603916749661994058495038701903e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(10))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.11285389689081092090080900757256655213056507935765e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(11))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.06722664628240641657463412013080864534542465320880e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.02697048057896404675748265840787538239835533463915e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(13))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.91095580948192894243059119812739170064298855954150e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.58851606064421509151228732938834025845710632753865e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(15))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.29657717833196446423749763633852786438048712229891e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.03056141356415128156562790092782169373827214514422e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(18))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.56233191903709448828683239881723764461808662475751e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.16188637156340219345609662100276117883253664060901e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(22))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.81416233095397134117651222065771104114974953207875e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(24))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.50842302656587649324679534871796140718152459081608e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(26))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.23677655844367661796208750471724232185503310585430e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(28))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.99327211922253387063577514052912042212460501121145e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(30))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.77332469896087134873409606852930319306153102107748e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(32))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.57333571766941514095434647381791053890536948900264e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(36))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.22230201122276543582018268125914307770458422539453e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(40))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.92306196750274052986046861933822660319647411503460e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(44))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.66393766871935144778495176368042799313533760396735e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(48))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.43664512355365430998504793608427289643995825810352e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(52))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.23512057902480812783067842173364430254180391574517e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(56))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.05480824780342286194892111115606787981112335506282e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(60))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.89220914007246019306457129229565668476439227449653e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(64))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.74458482861654721048545411630851774084511858223315e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(72))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.48596753801598503319767014729339663622931424118435e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(80))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.26600858175294196667322269091614319302918711248774e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(88))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.07590501853150263567490077970815440178521221113294e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(96))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.90943298811551585430796926339912286222583383628380e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(104))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.76204970249877025738751243150107820229031119377257e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(112))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.63035057455899788326457066144517427412403198177255e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(120))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.51172649975372596610965298504076329424502797908092e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.40413948757894194852726127978788270443438529203793e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.43203077043778334466271831424077221758122536074329e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(512))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.73229441904414627006839605354679934626132938387870e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast(1024))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.23125360026260788506665554837055341098621632464561e-2), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 11))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.73810855742053432677942037710993481024582345070051e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.19474376202028990304398162068685909212949561120262e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 13))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.38834786517841980543748022239405103577545566369058e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.10703837128619218169124143528829380601241157169895e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 15))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.19901414203078529527146967144846039577487804212104e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55594175267868082494169767822936567533407971376816e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 17))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.10071922510385832628139939726551776872500893306030e-3), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 18))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.78577261178721830451003518231079692441875499186244e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 19))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.50662913793980803259595258646776816982578828753293e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.89440319684962022373915418699465975439163763242915e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 21))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.75407315047618220330559697044875151329985165407760e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 22))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.94758093727122517797618473870421858045294374184298e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 23))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.37722626166484277233388378972371910745163900023301e-4), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 24))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.73885317860649699113798763978865018837655503068780e-5), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.88660557570383272842403146191017664814974769956447e-5), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 26))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.86966373050763507830869601193566593204764987942763e-5), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 27))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.44342136111091391837428138940493220527760482550819e-5), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 28))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.43489115282263847728003495903341128041631655237825e-5), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 29))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.72174032467196796417055336934387675311978746399112e-5), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 30))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21746039858699001180528503217054534969400689600469e-5), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 31))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.60877573465331935753972340571396643508918647400950e-6), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 32))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.08733904872845891588154720869252063263709578283331e-6), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 34))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.04367878835689816266760026397728945536920490441769e-6), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 36))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.52184171018215269383027201178304809813110038062619e-6), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 38))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.60921434092694304578619454290395143640352517265958e-7), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 40))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.80460861796838148522601893634284307293180095878460e-7), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 42))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.90230467086052636671801670850090200893179111799503e-7), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 44))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.51152425899360606075482273396798501430313790237214e-8), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 46))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.75576235566956348303137380307065627552780450075955e-8), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 48))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.37788123437797396666202471539368169920940171968838e-8), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 50))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.18894063132478530361545271176786170012121225204887e-8), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 52))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.94470319196342264878568431977547823579033279922390e-9), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 54))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.97235160481658539831961233459162678510644673997599e-9), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 56))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.48617580461701122279770683403051652704243983949025e-9), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 58))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.43087902860685242952854599081275466963995418723234e-10), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 60))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.71543951568387529445493346979934712370552753482953e-10), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 62))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.85771975818704991725083904340140685678094729181896e-10), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 64))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.28859879179803026143850997564073108091243061664589e-11), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 68))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.32214969811127894197045507227361880900936362117960e-11), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 72))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.80537424537930446531251492646463803649696387208569e-12), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 76))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.45134356135114531072734934087281275202470211834375e-12), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 80))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.62835890338181277331789748787296614195690507700714e-13), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 84))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.07089725845700036860694806306823770374585029986489e-14), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 88))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.26772431461440436935874981470647001703932928470872e-14), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 92))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.66931078653610734665125754039671239552191451228014e-15), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 96))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41732769663403286311621332289317114802389526570992e-15), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 100))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.54331924158508592432390764336465547336588796613693e-16), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 104))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.85829810396271716489312806849560623979223766702760e-17), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 108))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.21457452599067943835349195212917512291401637194850e-17), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 112))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.53643631497669868784011108970123778268444560432089e-18), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 116))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38410907874417467770730159801145325661085046788106e-18), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 120))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.46027269686043669786030013601997303312653750436687e-19), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 124))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.65068174215109174689577917816952001659129997288134e-20), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.16267043553777293686425909692485421878259294596496e-20), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.16267043553777293686425909692485421878259294596496e-20), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 136))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.35166902221110808556756707276211088178619562345223e-21), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 144))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.44793138881942553480799933647572308735410707938645e-23), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 152))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.27995711801214095925541775450484763569750411131894e-24), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 160))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.29997319875758809953465243130019854989301769477462e-25), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 168))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.06248324922349256220915840763819709280743596647028e-26), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 176))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28905203076468285138072402969870025328294044417034e-27), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 184))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.05657519227926782112952519535313715734583596556713e-29), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 192))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.03535949517454238820595324747603340202581381400690e-30), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 200))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.14709968448408899262872077968737723090225341248585e-31), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 208))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.96693730280255562039295048730519109566688181166036e-32), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 216))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.22933581425159726274559405456576710378996415685244e-33), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 224))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.68334883907248289215996284103605325376468341179834e-35), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 232))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.80209302442030180759997677564753362950438836016578e-36), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 240))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.00130814026268862974998548477970853195201855431423e-37), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 248))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.87581758766418039359374092798731783299781533977493e-38), tolerance); + BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast(1), 256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.17238599229011274599608807999207364564425192108311e-39), tolerance); +} + +template +void do_test_saspoint5_quantile_nearzero() { + // + // Basic sanity checks, tolerance is either 4 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 4; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + saspoint5_distribution dist(static_cast(0), static_cast(1)); + + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.03125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.52796721097108753422708089760626414214332697170320e2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.56591342761460650504994018321276991271746451691815e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.09375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.47218526924747883249737601803820876470332879082284e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -7.64871492892195438064623231330402224448304864505255e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.15625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.49147047287704191198103513188406591369158240312651e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.83944503273842706198239703540854699099743899944658e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.21875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.88121977453918311545713678336944882698543050262508e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.28383277518932774280834618691095083936296378494548e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.28125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.91036143728268221909726623759154622815926077224027e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.21997830440617027092737461125926863143124423787158e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.34375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.31665804788247227155363433182318290115115268840234e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.93390030300024509983060478023436359505370694478274e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.40625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.90586867715715827365143687189250571789458607638023e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.12403044179836064404720613683569098344014116798151e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.46875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.11633396058323297953641398479658668551534240629654e-2), tolerance); + BOOST_CHECK_EQUAL(quantile(dist, static_cast(0.5)), static_cast(0)); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.50390625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.14052785123109810231692510348602868600338388040310e-3), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.5078125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.23083326042460532375320968182992121472721389674838e-2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.51171875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.85291421003937677871303076502465267120545270922005e-2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.515625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.48262870167331460723959759110260709264416995145257e-2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.51953125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.12205802380990657930467930989682569659575359795940e-2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.5234375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.77305954522603022893044033304410291628163249182374e-2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.52734375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.43730759016907991505397132799991250199870494073045e-2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.53125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.11633396058323297953641398479658668551534240629654e-2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.53515625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.81156327963950319747789851203804452183342931095156e-2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.5390625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.52434225749394951914392360992042907273807357938464e-2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.54296875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.25596345497536099309066715771745797836543079457304e-2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.546875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.00768452063066360042362619764345937296973490700293e-2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.55078125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.78074386337778547450756599814721014853736307848400e-2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.5546875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.57637357991828558350746829889151355918095876787015e-2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.55859375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.03958102936522630889618724151593050682836910322233e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.12403044179836064404720613683569098344014116798151e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.56640625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21111282406179691889755301310617804156919563608730e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.5703125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.30095831353205038001599004861145346134019254008840e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.57421875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.39370061391699662824182194486817335276363125501648e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.578125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.48947760819317620418221654282830443278848716355952e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.58203125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.58843194157470428889620974461757508836357970580066e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.5859375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.69071158647072305786338619311971877757990751767576e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.58984375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.79647039926214909876835259387875129672714154918583e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.59375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.90586867715715827365143687189250571789458607638023e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.59765625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.01907372223400262964766551738092686471437958693421e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.6015625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.13626041895044652657368022843673732366221034591882e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.60546875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.25761183081873425453317860727230431668344830552276e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.609375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.38331982156037948249435913385386178518604561255635e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.61328125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.51358570582737493855890017214322139090955378416415e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.6171875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.64862093447749446836087591919859178171321397783436e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.62109375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.78864781940101196225997767818887450385653350744028e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.93390030300024509983060478023436359505370694478274e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.62890625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.08462477761234306446751675008291773968116735262918e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.6328125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.24108096043381713255460633677010126705310806557902e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.63671875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.40354282984953644763765300071916088017186215793874e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.640625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.57229962948879265758087216105113103390988022371525e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.64453125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.74765694682819607503814996847853363329594489637064e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.6484375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.92993787373911729744782959203924101663354892145487e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.65234375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.11948425704142844251280879133218574177911117813611e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.65625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.31665804788247227155363433182318290115115268840234e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.66015625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.52184275961932281071152630025400546639852185492776e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.6640625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.73544504485420224870167294387230375351476906471546e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.66796875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.95789640337012040028773901682608788684052806162084e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.671875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.18965503395137956448285166074873893264110804474852e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.67578125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.43120784446902465897667277460048286927884165223763e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.6796875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.68307263618494854564544144712177429500269729880351e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.68359375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.94580048000377446115526240745459082802949048053747e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.21997830440617027092737461125926863143124423787158e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.69140625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.50623171706258215659862497663103390044283888606793e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.6953125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.80522808468923275824853139897350453182651092750420e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.69921875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.11767989861122001397308542198440114248503523449816e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.703125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.44434845679017252964098546334886010522317885397184e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.70703125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.78604789681373771559537778013045202059314559478553e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.7109375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.14364961859827390982162302693689202778040961541097e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.71484375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.51808714040276392410190679405041748405632099066392e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.71875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.91036143728268221909726623759154622815926077224027e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.72265625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.32154681743453457545426144342359967492966930439508e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.7265625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.75279739912087792579527536186650514912572075425034e-1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.73046875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.02053542591698211050464719677242867419786774375833e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.734375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.06805533335867623214314225100786133910606465280065e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.73828125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.11798341618055383937616799179262778846061718089667e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.7421875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.17047495787852935040070049482482614635865914736031e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.74609375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.22569764738178929583883189036464601993502267035808e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28383277518932774280834618691095083936296378494548e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.75390625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.34507656531879179894649466860868632699377270813348e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.7578125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.40964166091862115574755023135179331832042209281055e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.76171875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.47775878407059212254277747469428558567366208287249e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.765625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.54967859343857147744530902523828155595548983145552e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.76953125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.62567376708606422065437126777269201178590320302836e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.7734375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.70604134209343637939962724226064374287142808011846e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.77734375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.79110534768099925573827334628551158224393429767853e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.78125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.88121977453918311545713678336944882698543050262508e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.78515625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.97677193016829061858313922289253621707890698703331e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.7890625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.07818623846627168681622781867868137142587154742683e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.79296875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.18592855185448213952914641315720989891399211112618e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.796875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.30051105624503972344131618629751440966474716683287e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.80078125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.42249786355837299731799089007913700561632679250929e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.8046875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.55251140382947389347453508427822893893843094122006e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.80859375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.69123974986228153051487214813181004658778348371858e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.83944503273842706198239703540854699099743899944658e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.81640625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.99797313731027706223638793579547090613393780037951e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.8203125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.16776490443789287489784195960321745577830473993117e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.82421875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.34986911285371709243134965339463237363883153982305e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.828125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.54545757031320957733016491289957363442390624403356e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.83203125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.75584271388037850609173432108976499052095906002965e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.8359375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.98249820637230758503593250844352437505576086329541e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.83984375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.22708312477451358281191586486620734769643769191868e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.84375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.49147047287704191198103513188406591369158240312651e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.84765625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.77778092239392917026904003217947497405149113745180e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.8515625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.08842290488213149257806983557860643076120157707468e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.85546875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.42614045478507551744169142637868564301201400254633e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.859375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.79407056051931174009446309499422812782537696684469e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.86328125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.19581224082198008577426461565974789703165385453255e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.8671875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.63551016172006234258563545412611777992137495354692e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.87109375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.11795639223606059741678567601858859535625176228939e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.64871492892195438064623231330402224448304864505255e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.87890625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.23427499059001976769144078614154782774804457875826e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.8828125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.88224092195729153863789585576784499265806702821498e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.88671875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.60156902856277721120026862113617294726340622812143e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.890625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.04028650542151264864039545865194654420659149847715e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.89453125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.12987606827387901549180798300970689324385087503567e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.8984375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.23043939515379786765482759320371211340959481379476e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.90234375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.34380276304930757031534891808152959303504163146383e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.90625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.47218526924747883249737601803820876470332879082284e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.91015625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.61830428929231043118455947432627297107033412348157e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.9140625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.78551541642651806413112158903680754166702207111037e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.91796875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.97800037388343052469605081868946702260361591387557e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.921875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.20102262690774549296442657868648686186513838548331e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.92578125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.46128002888129351171085541318961326325857070786293e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.9296875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.76739893404884608939800784860050078072327340444416e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.93359375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.13063845566677402478557805956745594172754985803934e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.56591342761460650504994018321276991271746451691815e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.94140625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.09331197973287229185946564870900349250152411988374e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.9453125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.74040079933008774660558150020826896333899909298154e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.94921875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.54582183342592176371744225018671216049948426069910e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.953125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.56507752301931033036206963972726950780990330962767e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.95703125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.88016814096703401470072036376924072379132872310869e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.9609375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.61631402738793734570515994292749381948933242893841e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.96484375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.19723988112353634841352379780826428736159712449728e2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.96875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.52796721097108753422708089760626414214332697170320e2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.97265625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.01231735395463433145333990641210588531111270293944e2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.9765625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.76159370958398793120791299241979313515017333512222e2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.98046875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.00925470915587723915099888946681205221515647834867e2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.984375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.31534559333932129219229424636026382581685146602733e2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.98828125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.13177617125304414676334752220512104772421667697176e3), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.9921875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.56685855095635428319020462289713380105568378947326e3), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, static_cast(0.99609375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.03488985899360430847226178216145298481879311104883e4), tolerance); +} + +template +void do_test_saspoint5_quantile_lower() { + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + saspoint5_distribution dist(static_cast(0), static_cast(1)); + + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -3)), BOOST_MATH_BIG_CONSTANT(RealType, N, -7.64871492892195438064623231330402224448304864505255e0), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -4)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.56591342761460650504994018321276991271746451691815e1), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -5)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.52796721097108753422708089760626414214332697170320e2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -6)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.31534559333932129219229424636026382581685146602733e2), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -7)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.56685855095635428319020462289713380105568378947326e3), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -8)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.03488985899360430847226178216145298481879311104883e4), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -10)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.66560111810464968102166352094329039797553809013180e5), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -12)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.66887306794201818857999976857911823063870328368667e6), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -14)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.27176145418643796488192673589719523675773715325372e7), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -16)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.83544414827242601690013182749435284100680743652971e8), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -20)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.74992376775466783667682507089110333268769521176506e11), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -24)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.47981285598233127388251456426050883440628660913084e13), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -28)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.14683221929996578514587273759806303045535542474016e16), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -32)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.93589050191487067535089237582921756933019831739543e18), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -40)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.92406520022739246958562710299618130085960993793414e23), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -48)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.26095536962330863369654555704556352597738045353834e28), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -56)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.26379711036337395023730103376706642065822106379442e32), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -64)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.41576207424774090175793327287164715152292673169682e37), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -80)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.32605209918111709774537445734765990902394758994645e47), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -96)), BOOST_MATH_BIG_CONSTANT(RealType, N, -9.99031769477504631556283515692397586421188586846623e56), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -112)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.29080877757089340022276728331106401823387140732677e66), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -128)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.84288833730567254754590245964397936517161760904520e76), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -160)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.39952895147018642535639799349517299692324064545541e95), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -192)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.27102405389367073393026479530690706073270941007595e114), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -224)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.15679975802253118434756797269580371455350883468455e134), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -256)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.13391890807707704630144658385551537641074743396505e153), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -256)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.13391890807707704630144658385551537641074743396505e153), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -320)), BOOST_MATH_BIG_CONSTANT(RealType, N, -7.26134976857812288039693249651849278418162312282066e191), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -384)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.47090928629257240812467048839513197145079590955310e230), tolerance); + BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast(1), -448)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.40806860386563308738645719151345111937971699513862e268), tolerance); +} + +template +void do_test_saspoint5_quantile_upper() { + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + saspoint5_distribution dist(static_cast(0), static_cast(1)); + + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -3))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.64871492892195438064623231330402224448304864505255e0), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -4))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.56591342761460650504994018321276991271746451691815e1), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.52796721097108753422708089760626414214332697170320e2), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -6))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.31534559333932129219229424636026382581685146602733e2), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -7))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.56685855095635428319020462289713380105568378947326e3), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -8))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.03488985899360430847226178216145298481879311104883e4), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -10))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.66560111810464968102166352094329039797553809013180e5), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.66887306794201818857999976857911823063870328368667e6), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.27176145418643796488192673589719523675773715325372e7), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.83544414827242601690013182749435284100680743652971e8), tolerance); + BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast(1), -20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.74992376775466783667682507089110333268769521176506e11), tolerance); +} + +template +void do_test_saspoint5_locscale_param() { + // + // Basic sanity checks, tolerance is either 3 epsilon + // expressed as a percentage: + // + + BOOST_MATH_STD_USING + RealType tolerance = boost::math::tools::epsilon() * 100 * 3; + + std::cout << "Testing acurracy[%]: " << tolerance << std::endl; + + saspoint5_distribution dist_0_1(static_cast(0), static_cast(1)); + saspoint5_distribution dist_1_3(static_cast(1), static_cast(3)); + + BOOST_CHECK_CLOSE(entropy(dist_0_1), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.6399244456803064957308496039071853510), tolerance); + BOOST_CHECK_CLOSE(entropy(dist_1_3), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.6399244456803064957308496039071853510) + log(static_cast(3)), tolerance); + + BOOST_CHECK_EQUAL(median(dist_0_1), static_cast(0)); + BOOST_CHECK_EQUAL(median(dist_1_3), static_cast(1)); + + BOOST_CHECK_EQUAL(mode(dist_0_1), static_cast(0)); + BOOST_CHECK_EQUAL(mode(dist_1_3), static_cast(1)); + + BOOST_CHECK_CLOSE(pdf(dist_0_1, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.61071469126041183247373313827161939453635781053656e-2), tolerance); + BOOST_CHECK_CLOSE(pdf(dist_1_3, static_cast(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.36619772367581343075535053490057448137838582961826e-1) / 3, tolerance); + + BOOST_CHECK_CLOSE(cdf(dist_0_1, static_cast(2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.86071837724616281594369593572177037450866923839017e-1), tolerance); + BOOST_CHECK_CLOSE(cdf(dist_1_3, static_cast(7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.86071837724616281594369593572177037450866923839017e-1), tolerance); + + BOOST_CHECK_CLOSE(cdf(dist_0_1, quantile(dist_0_1, static_cast(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.25), tolerance); + BOOST_CHECK_CLOSE(cdf(dist_1_3, quantile(dist_1_3, static_cast(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.25), tolerance); + + BOOST_CHECK_CLOSE(cdf(dist_0_1, quantile(dist_0_1, static_cast(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.75), tolerance); + BOOST_CHECK_CLOSE(cdf(dist_1_3, quantile(dist_1_3, static_cast(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.75), tolerance); +} + +BOOST_AUTO_TEST_CASE(saspoint5_pdf_fp64) +{ + do_test_saspoint5_pdf(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(saspoint5_pdf_std64) +{ + do_test_saspoint5_pdf(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(saspoint5_pdf_fp128) +{ + do_test_saspoint5_pdf(); +} +#endif + +BOOST_AUTO_TEST_CASE(saspoint5_cdf_fp64) +{ + do_test_saspoint5_cdf(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(saspoint5_cdf_std64) +{ + do_test_saspoint5_cdf(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(saspoint5_cdf_fp128) +{ + do_test_saspoint5_cdf(); +} +#endif + +BOOST_AUTO_TEST_CASE(saspoint5_ccdf_fp64) +{ + do_test_saspoint5_ccdf(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(saspoint5_ccdf_std64) +{ + do_test_saspoint5_ccdf(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(saspoint5_ccdf_fp128) +{ + do_test_saspoint5_ccdf(); +} +#endif + +BOOST_AUTO_TEST_CASE(saspoint5_quantile_nearzero_fp64) +{ + do_test_saspoint5_quantile_nearzero(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(saspoint5_quantile_nearzero_std64) +{ + do_test_saspoint5_quantile_nearzero(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(saspoint5_quantile_nearzero_fp128) +{ + do_test_saspoint5_quantile_nearzero(); +} +#endif + +BOOST_AUTO_TEST_CASE(saspoint5_quantile_lower_fp64) +{ + do_test_saspoint5_quantile_lower(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(saspoint5_quantile_lower_std64) +{ + do_test_saspoint5_quantile_lower(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(saspoint5_quantile_lower_fp128) +{ + do_test_saspoint5_quantile_lower(); +} +#endif + +BOOST_AUTO_TEST_CASE(saspoint5_quantile_upper_fp64) +{ + do_test_saspoint5_quantile_upper(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(saspoint5_quantile_upper_std64) +{ + do_test_saspoint5_quantile_upper(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(saspoint5_quantile_upper_fp128) +{ + do_test_saspoint5_quantile_upper(); +} +#endif + +BOOST_AUTO_TEST_CASE(saspoint5_locscale_fp64) +{ + do_test_saspoint5_locscale_param(); +} + +#ifdef __STDCPP_FLOAT64_T__ +BOOST_AUTO_TEST_CASE(saspoint5_locscale_std64) +{ + do_test_saspoint5_locscale_param(); +} +#endif + +#ifndef BOOST_MATH_HAS_GPU_SUPPORT +BOOST_AUTO_TEST_CASE(saspoint5_locscale_fp128) +{ + do_test_saspoint5_locscale_param(); +} +#endif diff --git a/test/test_saspoint5_cdf_double.cu b/test/test_saspoint5_cdf_double.cu new file mode 100644 index 0000000000..fb3e2f74c8 --- /dev/null +++ b/test/test_saspoint5_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::saspoint5_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::saspoint5_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_saspoint5_cdf_float.cu b/test/test_saspoint5_cdf_float.cu new file mode 100644 index 0000000000..325a470bba --- /dev/null +++ b/test/test_saspoint5_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::saspoint5_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::saspoint5_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_saspoint5_cdf_nvrtc_double.cpp b/test/test_saspoint5_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..ff2067fa0c --- /dev/null +++ b/test/test_saspoint5_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_saspoint5_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::saspoint5_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_saspoint5_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_saspoint5_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_saspoint5_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::saspoint5_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_saspoint5_cdf_nvrtc_float.cpp b/test/test_saspoint5_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..11c816da1c --- /dev/null +++ b/test/test_saspoint5_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_saspoint5_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::saspoint5_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_saspoint5_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_saspoint5_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_saspoint5_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::saspoint5_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_saspoint5_pdf_double.cu b/test/test_saspoint5_pdf_double.cu new file mode 100644 index 0000000000..5392a328bf --- /dev/null +++ b/test/test_saspoint5_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::saspoint5_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::saspoint5_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_saspoint5_pdf_float.cu b/test/test_saspoint5_pdf_float.cu new file mode 100644 index 0000000000..01fbcd472b --- /dev/null +++ b/test/test_saspoint5_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::saspoint5_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::saspoint5_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_saspoint5_pdf_nvrtc_double.cpp b/test/test_saspoint5_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..4c74443836 --- /dev/null +++ b/test/test_saspoint5_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_saspoint5_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::saspoint5_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_saspoint5_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_saspoint5_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_saspoint5_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::saspoint5_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_saspoint5_pdf_nvrtc_float.cpp b/test/test_saspoint5_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..8cd93aaa94 --- /dev/null +++ b/test/test_saspoint5_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_saspoint5_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::saspoint5_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_saspoint5_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_saspoint5_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_saspoint5_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::saspoint5_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_saspoint5_quan_double.cu b/test/test_saspoint5_quan_double.cu new file mode 100644 index 0000000000..7415f06906 --- /dev/null +++ b/test/test_saspoint5_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::saspoint5_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::saspoint5_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_saspoint5_quan_float.cu b/test/test_saspoint5_quan_float.cu new file mode 100644 index 0000000000..d6f49084bb --- /dev/null +++ b/test/test_saspoint5_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::saspoint5_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::saspoint5_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_saspoint5_quan_nvrtc_double.cpp b/test/test_saspoint5_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..631ac4b243 --- /dev/null +++ b/test/test_saspoint5_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_saspoint5_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::saspoint5_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_saspoint5_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_saspoint5_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_saspoint5_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::saspoint5_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_saspoint5_quan_nvrtc_float.cpp b/test/test_saspoint5_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..fa152622e6 --- /dev/null +++ b/test/test_saspoint5_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_saspoint5_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::saspoint5_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_saspoint5_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_saspoint5_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_saspoint5_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::saspoint5_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_sign.cpp b/test/test_sign.cpp index 864d2dd121..530a60d503 100644 --- a/test/test_sign.cpp +++ b/test/test_sign.cpp @@ -1,5 +1,6 @@ -#define BOOST_TEST_MAIN// Copyright John Maddock 2008 +// Copyright John Maddock 2008 // (C) Copyright Paul A. Bristow 2011 (added tests for changesign) +// Copyright Matt Borland 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt @@ -147,7 +148,9 @@ BOOST_AUTO_TEST_CASE( test_main ) test_spots(0.0, "double"); // Test double. OK at decdigits 7, tolerance = 1e07 % // long double support for the sign functions is considered "core" so we always test it // even when long double support is turned off via BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS +#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS test_spots(0.0L, "long double"); // Test long double. +#endif #ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS test_spots(boost::math::concepts::real_concept(0), "real_concept"); // Test real_concept. #endif diff --git a/test/test_sign_nvrtc_double.cpp b/test/test_sign_nvrtc_double.cpp new file mode 100644 index 0000000000..0951f9ef68 --- /dev/null +++ b/test/test_sign_nvrtc_double.cpp @@ -0,0 +1,193 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +extern "C" __global__ +void test_gamma_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::signbit(in1[i]) + + boost::math::changesign(in1[i]) + + boost::math::copysign(in1[i], in2[i]) + + boost::math::sign(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_gamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::signbit(h_in1[i]) + + boost::math::changesign(h_in1[i]) + + boost::math::copysign(h_in1[i], h_in2[i]) + + boost::math::sign(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_sign_nvrtc_float.cpp b/test/test_sign_nvrtc_float.cpp new file mode 100644 index 0000000000..6e07f1996a --- /dev/null +++ b/test/test_sign_nvrtc_float.cpp @@ -0,0 +1,193 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +extern "C" __global__ +void test_gamma_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::signbit(in1[i]) + + boost::math::changesign(in1[i]) + + boost::math::copysign(in1[i], in2[i]) + + boost::math::sign(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_gamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::signbit(h_in1[i]) + + boost::math::changesign(h_in1[i]) + + boost::math::copysign(h_in1[i], h_in2[i]) + + boost::math::sign(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_sin_pi_double.cu b/test/test_sin_pi_double.cu new file mode 100644 index 0000000000..0783d55363 --- /dev/null +++ b/test/test_sin_pi_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::sin_pi(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::sin_pi(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_sin_pi_float.cu b/test/test_sin_pi_float.cu new file mode 100644 index 0000000000..9a9f075807 --- /dev/null +++ b/test/test_sin_pi_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::sin_pi(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::sin_pi(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_sin_pi_nvrtc_double.cpp b/test/test_sin_pi_nvrtc_double.cpp new file mode 100644 index 0000000000..b6cff9798b --- /dev/null +++ b/test/test_sin_pi_nvrtc_double.cpp @@ -0,0 +1,186 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +extern "C" __global__ +void test_sin_pi_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::sin_pi(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sin_pi_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_sin_pi_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_sin_pi_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::sin_pi(h_in1[i]); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_sin_pi_nvrtc_float.cpp b/test/test_sin_pi_nvrtc_float.cpp new file mode 100644 index 0000000000..f67079774f --- /dev/null +++ b/test/test_sin_pi_nvrtc_float.cpp @@ -0,0 +1,186 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +extern "C" __global__ +void test_sin_pi_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::sin_pi(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sin_pi_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_sin_pi_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_sin_pi_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = boost::math::sin_pi(h_in1[i]); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_sinh_sinh_quad_double.cu b/test/test_sinh_sinh_quad_double.cu new file mode 100644 index 0000000000..bf7490fa4b --- /dev/null +++ b/test/test_sinh_sinh_quad_double.cu @@ -0,0 +1,133 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +__host__ __device__ float_type func(float_type x) +{ + BOOST_MATH_STD_USING + return 1/(1+x*x); +} + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::size_t levels; + + if (i < numElements) + { + out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = M_PI * (static_cast(i) / numElements); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::quadrature::sinh_sinh integrator; + for(int i = 0; i < numElements; ++i) + { + results.push_back(integrator.integrate(func, tol, &error, &L1)); + } + double t = w.elapsed(); + // check the results + int failed_count = 0; + for(int i = 0; i < numElements; ++i) + { + const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]); + if (eps > 10) + { + std::cerr << std::setprecision(std::numeric_limits::digits10) + << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i] + << "\n Host: " << results[i] + << "\n Eps: " << eps << "\n"; + failed_count++; + } + if (failed_count > 100) + { + break; + } + } + + if (failed_count != 0) + { + std::cout << "Test FAILED" << std::endl; + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_sinh_sinh_quad_float.cu b/test/test_sinh_sinh_quad_float.cu new file mode 100644 index 0000000000..b84e316af9 --- /dev/null +++ b/test/test_sinh_sinh_quad_float.cu @@ -0,0 +1,133 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +__host__ __device__ float_type func(float_type x) +{ + BOOST_MATH_STD_USING + return 1/(1+x*x); +} + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::size_t levels; + + if (i < numElements) + { + out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = M_PI * (static_cast(i) / numElements); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::quadrature::sinh_sinh integrator; + for(int i = 0; i < numElements; ++i) + { + results.push_back(integrator.integrate(func, tol, &error, &L1)); + } + double t = w.elapsed(); + // check the results + int failed_count = 0; + for(int i = 0; i < numElements; ++i) + { + const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]); + if (eps > 10) + { + std::cerr << std::setprecision(std::numeric_limits::digits10) + << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i] + << "\n Host: " << results[i] + << "\n Eps: " << eps << "\n"; + failed_count++; + } + if (failed_count > 100) + { + break; + } + } + + if (failed_count != 0) + { + std::cout << "Test FAILED" << std::endl; + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_sinh_sinh_quad_nvrtc_double.cpp b/test/test_sinh_sinh_quad_nvrtc_double.cpp new file mode 100644 index 0000000000..5342e97785 --- /dev/null +++ b/test/test_sinh_sinh_quad_nvrtc_double.cpp @@ -0,0 +1,206 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include + +__host__ __device__ float_type func(float_type x) +{ + return 1/(1+x*x); +} + +extern "C" __global__ +void test_sinh_sinh_kernel(const float_type*, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::size_t levels; + + if (i < numElements) + { + out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels); + } +} +)"; + +__host__ __device__ float_type func(float_type x) +{ + return 1/(1+x*x); +} + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sinh_sinh_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_sinh_sinh_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_sinh_sinh_kernel"), "Failed to get kernel function"); + + int numElements = 50000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::quadrature::sinh_sinh integrator; + for (int i = 0; i < numElements; ++i) + { + auto res = integrator.integrate(func, tol, &error, &L1); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_sinh_sinh_quad_nvrtc_float.cpp b/test/test_sinh_sinh_quad_nvrtc_float.cpp new file mode 100644 index 0000000000..37a8c12525 --- /dev/null +++ b/test/test_sinh_sinh_quad_nvrtc_float.cpp @@ -0,0 +1,206 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include + +__host__ __device__ float_type func(float_type x) +{ + return 1/(1+x*x); +} + +extern "C" __global__ +void test_sinh_sinh_kernel(const float_type*, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::size_t levels; + + if (i < numElements) + { + out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels); + } +} +)"; + +__host__ __device__ float_type func(float_type x) +{ + return 1/(1+x*x); +} + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sinh_sinh_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_sinh_sinh_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_sinh_sinh_kernel"), "Failed to get kernel function"); + + int numElements = 50000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::quadrature::sinh_sinh integrator; + for (int i = 0; i < numElements; ++i) + { + auto res = integrator.integrate(func, tol, &error, &L1); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_skew_normal.cpp b/test/test_skew_normal.cpp index 5f0657fbd1..617a8f6fab 100644 --- a/test/test_skew_normal.cpp +++ b/test/test_skew_normal.cpp @@ -356,7 +356,7 @@ void test_spots(RealType) BOOST_CHECK_CLOSE( // mean: mean(dist) - , static_cast(-0.579908992539856825862549L), tol10 * 2); + , static_cast(-0.5799089925398568258625490172876619L), tol10 * 2); std::cout << std::setprecision(17) << "Variance = " << variance(dist) << std::endl; BOOST_CHECK_CLOSE( // variance: N[variance[skewnormaldistribution[1.1, 2.2, -3.3]], 50] @@ -365,27 +365,27 @@ void test_spots(RealType) BOOST_CHECK_CLOSE( // skewness: skewness(dist) - , static_cast(-0.709854548171537509192897824663L), tol100); + , static_cast(-0.709854548171537509192897824663027155L), tol100); BOOST_CHECK_CLOSE( // kurtosis: kurtosis(dist) - , static_cast(3.5538752625241790601377L), tol100); + , static_cast(3.55387526252417906013770535120683805L), tol100); BOOST_CHECK_CLOSE( // kurtosis excess: kurtosis_excess(dist) - , static_cast(0.5538752625241790601377L), tol100); + , static_cast(0.553875262524179060137705351206838143L), tol100); BOOST_CHECK_CLOSE( pdf(dist, static_cast(0.4L)), - static_cast(0.294140110156599539564571L), + static_cast(0.294140110156599539564571034730246656L), tol10); BOOST_CHECK_CLOSE( cdf(dist, static_cast(0.4L)), - static_cast(0.7339186189278737976326676452L), + static_cast(0.733918618927873797632667645226588243L), tol100); BOOST_CHECK_CLOSE( quantile(dist, static_cast(0.3L)), - static_cast(-1.180104068086875314419247L), + static_cast(-1.18010406808687531441924729956233392L), tol100); @@ -395,72 +395,63 @@ void test_spots(RealType) // cout << "pdf(dist, 0) = " << pdf(dist, 0) << ", pdf(dist, 0.45) = " << pdf(dist, 0.45) << endl; // BOOST_CHECK_CLOSE(mode(dist), boost::math::constants::root_two() / 2, tol5); - BOOST_CHECK_CLOSE(mode(dist), static_cast(0.41697299497388863932L), tol100); + BOOST_CHECK_CLOSE(mode(dist), static_cast(0.416972994973888639318345129445233074L), tol100); } } - if(std::numeric_limits< RealType>::digits && (std::numeric_limits::digits < 100)) - { - dist = skew_normal_distribution(static_cast(1.1l), static_cast(0.02l), static_cast(0.03l)); + dist = skew_normal_distribution(static_cast(1.1l), static_cast(0.02l), static_cast(0.03l)); - BOOST_CHECK_CLOSE( // mean: + BOOST_CHECK_CLOSE( // mean: mean(dist) - , static_cast(1.1004785154529557886162L), tol10); - BOOST_CHECK_CLOSE( // variance: + , static_cast(1.1004785154529557886162056250600829L), tol10); + BOOST_CHECK_CLOSE( // variance: variance(dist) - , static_cast(0.00039977102296128251645L), tol10); + , static_cast(0.000399771022961282516451686289719995601L), tol10); - BOOST_CHECK_CLOSE( // skewness: + BOOST_CHECK_CLOSE( // skewness: skewness(dist) - , static_cast(5.8834811259890359782e-006L), tol100); - BOOST_CHECK_CLOSE( // kurtosis: + , static_cast(5.88348112598903597820852388986073439e-006L), tol100); + BOOST_CHECK_CLOSE( // kurtosis: kurtosis(dist) - , static_cast(3.L + 9.2903475812137800239002e-008L), tol100); - BOOST_CHECK_CLOSE( // kurtosis excess: + , static_cast(3.L + 9.290347581213780023900209941e-008L), tol100); + BOOST_CHECK_CLOSE( // kurtosis excess: kurtosis_excess(dist) - , static_cast(9.2903475812137800239002e-008L), tol100); - } - if (std::numeric_limits< RealType>::digits && (std::numeric_limits::digits < 100)) - { - dist = skew_normal_distribution(static_cast(10.1l), static_cast(5.l), static_cast(-0.03l)); - BOOST_CHECK_CLOSE( // mean: + , static_cast(9.29034758121378002390020993765449518e-008L), tol100); + dist = skew_normal_distribution(static_cast(10.1l), static_cast(5.l), static_cast(-0.03l)); + BOOST_CHECK_CLOSE( // mean: mean(dist) - , static_cast(9.9803711367610528459485937L), tol10); - BOOST_CHECK_CLOSE( // variance: + , static_cast(9.98037113676105284594859373497928476L), tol10); + BOOST_CHECK_CLOSE( // variance: variance(dist) - , static_cast(24.98568893508015727823L), tol10); + , static_cast(24.9856889350801572782303931074997234L), tol10); - BOOST_CHECK_CLOSE( // skewness: + BOOST_CHECK_CLOSE( // skewness: skewness(dist) - , static_cast(-5.8834811259890359782085e-006L), tol100); - BOOST_CHECK_CLOSE( // kurtosis: + , static_cast(-5.88348112598903597820852388986073439e-006L), tol100); + BOOST_CHECK_CLOSE( // kurtosis: kurtosis(dist) - , static_cast(3.L + 9.2903475812137800239002e-008L), tol100); - BOOST_CHECK_CLOSE( // kurtosis excess: + , static_cast(3.L + 9.290347581213780023900209941e-008L), tol100); + BOOST_CHECK_CLOSE( // kurtosis excess: kurtosis_excess(dist) - , static_cast(9.2903475812137800239002e-008L), tol100); - } - if (std::numeric_limits< RealType>::digits && (std::numeric_limits::digits < 100)) - { - dist = skew_normal_distribution(static_cast(-10.1l), static_cast(5.l), static_cast(30.l)); - BOOST_CHECK_CLOSE( // mean: + , static_cast(9.29034758121378002390020993765449518e-008L), tol100); + dist = skew_normal_distribution(static_cast(-10.1l), static_cast(5.l), static_cast(30.l)); + BOOST_CHECK_CLOSE( // mean: mean(dist) - , static_cast(-6.11279169674138408531365L), 2 * tol10); - BOOST_CHECK_CLOSE( // variance: + , static_cast(-6.11279169674138408531365149047090859L), 2 * tol10); + BOOST_CHECK_CLOSE( // variance: variance(dist) - , static_cast(9.10216994642554914628242L), tol10 * 2); + , static_cast(9.10216994642554914628242097277880642L), tol10 * 2); - BOOST_CHECK_CLOSE( // skewness: + BOOST_CHECK_CLOSE( // skewness: skewness(dist) - , static_cast(0.99072425443686904424L), tol100); - BOOST_CHECK_CLOSE( // kurtosis: + , static_cast(0.990724254436869044244695246354219556L), tol100); + BOOST_CHECK_CLOSE( // kurtosis: kurtosis(dist) - , static_cast(3.L + 0.8638862008406084244563L), tol100); - BOOST_CHECK_CLOSE( // kurtosis excess: + , static_cast(3.L + 0.8638862008406084244563090239530549L), tol100); + BOOST_CHECK_CLOSE( // kurtosis excess: kurtosis_excess(dist) - , static_cast(0.8638862008406084244563L), tol100); - } + , static_cast(0.863886200840608424456309023953054896L), tol100); BOOST_MATH_CHECK_THROW(cdf(skew_normal_distribution(0, 0, 0), 0), std::domain_error); BOOST_MATH_CHECK_THROW(cdf(skew_normal_distribution(0, -1, 0), 0), std::domain_error); diff --git a/test/test_sph_bessel_double.cu b/test/test_sph_bessel_double.cu new file mode 100644 index 0000000000..5229dd8b5e --- /dev/null +++ b/test/test_sph_bessel_double.cu @@ -0,0 +1,119 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const unsigned *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::sph_bessel(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + std::mt19937_64 rng {42}; + std::uniform_int_distribution order(1, 100); + std::uniform_real_distribution val(0, 100); + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = order(rng); + input_vector2[i] = val(rng); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::sph_bessel(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + bool failed = false; + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(output_vector[i]) && std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 3000) + { + std::cout << "error at line: " << i + << "\nParallel: " << results[i] + << "\n Serial: " << output_vector[i] + << "\n Dist: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_sph_bessel_float.cu b/test/test_sph_bessel_float.cu new file mode 100644 index 0000000000..bd068a1a01 --- /dev/null +++ b/test/test_sph_bessel_float.cu @@ -0,0 +1,119 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const unsigned *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::sph_bessel(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + std::mt19937_64 rng {42}; + std::uniform_int_distribution order(1, 100); + std::uniform_real_distribution val(0, 100); + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = order(rng); + input_vector2[i] = val(rng); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::sph_bessel(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + bool failed = false; + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(output_vector[i]) && std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 150) + { + std::cout << "error at line: " << i + << "\nParallel: " << results[i] + << "\n Serial: " << output_vector[i] + << "\n Dist: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_sph_bessel_nvrtc_double.cpp b/test/test_sph_bessel_nvrtc_double.cpp new file mode 100644 index 0000000000..e88726ed71 --- /dev/null +++ b/test/test_sph_bessel_nvrtc_double.cpp @@ -0,0 +1,199 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_cyl_bessel_j_kernel(const unsigned *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::sph_bessel(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_bessel_j_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cyl_bessel_j_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_bessel_j_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + unsigned *h_in1, *d_in1; + float_type *h_in2, *h_out; + float_type *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new unsigned[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_int_distribution order(1, 100); + std::uniform_real_distribution val(0.0f, 100.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(order(rng)); + h_in2[i] = static_cast(val(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(unsigned)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(unsigned), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + bool failed = false; + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::sph_bessel(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 3000) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + failed = true; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + if (failed) + { + return 1; + } + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_sph_bessel_nvrtc_float.cpp b/test/test_sph_bessel_nvrtc_float.cpp new file mode 100644 index 0000000000..c9538cd5bf --- /dev/null +++ b/test/test_sph_bessel_nvrtc_float.cpp @@ -0,0 +1,199 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_cyl_bessel_j_kernel(const unsigned *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::sph_bessel(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_bessel_j_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_cyl_bessel_j_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_bessel_j_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + unsigned *h_in1, *d_in1; + float_type *h_in2, *h_out; + float_type *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new unsigned[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_int_distribution order(1, 100); + std::uniform_real_distribution val(0.0f, 100.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(order(rng)); + h_in2[i] = static_cast(val(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(unsigned)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(unsigned), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + bool failed = false; + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::sph_bessel(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 3000) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + failed = true; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + if (failed) + { + return 1; + } + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_sph_hankel_1_double.cu b/test/test_sph_hankel_1_double.cu new file mode 100644 index 0000000000..ea9ec23063 --- /dev/null +++ b/test/test_sph_hankel_1_double.cu @@ -0,0 +1,119 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, boost::math::complex *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::sph_hankel_1(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr> output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector> results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results[i] = boost::math::sph_hankel_1(input_vector1[i], input_vector2[i]); + double t = w.elapsed(); + // check the results + int failure_counter = 0; + for(int i = 0; i < numElements; ++i) + { + const auto eps = boost::math::epsilon_difference(output_vector[i].real(), results[i].real()); + if (eps > 10) + { + std::cerr << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i].real() << ", " << output_vector[i].imag() + << "\n Host: " << results[i].real() << ", " << results[i].imag() + << "\n Eps: " << eps << std::endl; + ++failure_counter; + if (failure_counter > 100) + { + break; + } + } + } + + if (failure_counter > 0) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_sph_hankel_1_float.cu b/test/test_sph_hankel_1_float.cu new file mode 100644 index 0000000000..4b01fe02a5 --- /dev/null +++ b/test/test_sph_hankel_1_float.cu @@ -0,0 +1,119 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, boost::math::complex *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::sph_hankel_1(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr> output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector> results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results[i] = boost::math::sph_hankel_1(input_vector1[i], input_vector2[i]); + double t = w.elapsed(); + // check the results + int failure_counter = 0; + for(int i = 0; i < numElements; ++i) + { + const auto eps = boost::math::epsilon_difference(output_vector[i].real(), results[i].real()); + if (eps > 10) + { + std::cerr << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i].real() << ", " << output_vector[i].imag() + << "\n Host: " << results[i].real() << ", " << results[i].imag() + << "\n Eps: " << eps << std::endl; + ++failure_counter; + if (failure_counter > 100) + { + break; + } + } + } + + if (failure_counter > 0) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_sph_hankel_1_nvrtc_double.cpp b/test/test_sph_hankel_1_nvrtc_double.cpp new file mode 100644 index 0000000000..3ff1da8b20 --- /dev/null +++ b/test/test_sph_hankel_1_nvrtc_double.cpp @@ -0,0 +1,199 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_sph_hankel_1_kernel(const float_type *in1, const float_type* in2, boost::math::complex *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::sph_hankel_1(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sph_hankel_1_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_sph_hankel_1_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_sph_hankel_1_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2; + float_type *d_in1, *d_in2; + boost::math::complex *h_out, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new boost::math::complex[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(boost::math::complex)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(boost::math::complex), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + int fail_counter = 0; + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::sph_hankel_1(h_in1[i], h_in2[i]); + if (boost::math::epsilon_difference(res.real(), h_out[i].real()) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i].real() << ", " << h_out[i].imag() + << "\n Serial: " << res.real() << ", " << res.imag() + << "\n Dist: " << boost::math::epsilon_difference(res.real(), h_out[i].real()) << std::endl; + ++fail_counter; + if (fail_counter > 100) + { + break; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + if (fail_counter > 0) + { + return 1; + } + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_sph_hankel_1_nvrtc_float.cpp b/test/test_sph_hankel_1_nvrtc_float.cpp new file mode 100644 index 0000000000..0b07966537 --- /dev/null +++ b/test/test_sph_hankel_1_nvrtc_float.cpp @@ -0,0 +1,199 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_sph_hankel_1_kernel(const float_type *in1, const float_type* in2, boost::math::complex *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::sph_hankel_1(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sph_hankel_1_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_sph_hankel_1_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_sph_hankel_1_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2; + float_type *d_in1, *d_in2; + boost::math::complex *h_out, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new boost::math::complex[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(boost::math::complex)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(boost::math::complex), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + int fail_counter = 0; + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::sph_hankel_1(h_in1[i], h_in2[i]); + if (boost::math::epsilon_difference(res.real(), h_out[i].real()) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i].real() << ", " << h_out[i].imag() + << "\n Serial: " << res.real() << ", " << res.imag() + << "\n Dist: " << boost::math::epsilon_difference(res.real(), h_out[i].real()) << std::endl; + ++fail_counter; + if (fail_counter > 100) + { + break; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + if (fail_counter > 0) + { + return 1; + } + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_sph_hankel_2_double.cu b/test/test_sph_hankel_2_double.cu new file mode 100644 index 0000000000..6631f73a02 --- /dev/null +++ b/test/test_sph_hankel_2_double.cu @@ -0,0 +1,119 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, boost::math::complex *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::sph_hankel_2(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr> output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector> results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results[i] = boost::math::sph_hankel_2(input_vector1[i], input_vector2[i]); + double t = w.elapsed(); + // check the results + int failure_counter = 0; + for(int i = 0; i < numElements; ++i) + { + const auto eps = boost::math::epsilon_difference(output_vector[i].real(), results[i].real()); + if (eps > 10) + { + std::cerr << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i].real() << ", " << output_vector[i].imag() + << "\n Host: " << results[i].real() << ", " << results[i].imag() + << "\n Eps: " << eps << std::endl; + ++failure_counter; + if (failure_counter > 100) + { + break; + } + } + } + + if (failure_counter > 0) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_sph_hankel_2_float.cu b/test/test_sph_hankel_2_float.cu new file mode 100644 index 0000000000..1910aef045 --- /dev/null +++ b/test/test_sph_hankel_2_float.cu @@ -0,0 +1,119 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, boost::math::complex *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::sph_hankel_2(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr> output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector> results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results[i] = boost::math::sph_hankel_2(input_vector1[i], input_vector2[i]); + double t = w.elapsed(); + // check the results + int failure_counter = 0; + for(int i = 0; i < numElements; ++i) + { + const auto eps = boost::math::epsilon_difference(output_vector[i].real(), results[i].real()); + if (eps > 10) + { + std::cerr << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i].real() << ", " << output_vector[i].imag() + << "\n Host: " << results[i].real() << ", " << results[i].imag() + << "\n Eps: " << eps << std::endl; + ++failure_counter; + if (failure_counter > 100) + { + break; + } + } + } + + if (failure_counter > 0) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_sph_hankel_2_nvrtc_double.cpp b/test/test_sph_hankel_2_nvrtc_double.cpp new file mode 100644 index 0000000000..fa57fcbb16 --- /dev/null +++ b/test/test_sph_hankel_2_nvrtc_double.cpp @@ -0,0 +1,199 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_sph_hankel_2_kernel(const float_type *in1, const float_type* in2, boost::math::complex *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::sph_hankel_2(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sph_hankel_2_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_sph_hankel_2_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_sph_hankel_2_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2; + float_type *d_in1, *d_in2; + boost::math::complex *h_out, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new boost::math::complex[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(boost::math::complex)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(boost::math::complex), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + int fail_counter = 0; + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::sph_hankel_2(h_in1[i], h_in2[i]); + if (boost::math::epsilon_difference(res.real(), h_out[i].real()) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i].real() << ", " << h_out[i].imag() + << "\n Serial: " << res.real() << ", " << res.imag() + << "\n Dist: " << boost::math::epsilon_difference(res.real(), h_out[i].real()) << std::endl; + ++fail_counter; + if (fail_counter > 100) + { + break; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + if (fail_counter > 0) + { + return 1; + } + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_sph_hankel_2_nvrtc_float.cpp b/test/test_sph_hankel_2_nvrtc_float.cpp new file mode 100644 index 0000000000..be6fd0d097 --- /dev/null +++ b/test/test_sph_hankel_2_nvrtc_float.cpp @@ -0,0 +1,199 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_sph_hankel_2_kernel(const float_type *in1, const float_type* in2, boost::math::complex *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::sph_hankel_2(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sph_hankel_2_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_sph_hankel_2_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_sph_hankel_2_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2; + float_type *d_in1, *d_in2; + boost::math::complex *h_out, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new boost::math::complex[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(boost::math::complex)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(boost::math::complex), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + int fail_counter = 0; + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::sph_hankel_2(h_in1[i], h_in2[i]); + if (boost::math::epsilon_difference(res.real(), h_out[i].real()) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i].real() << ", " << h_out[i].imag() + << "\n Serial: " << res.real() << ", " << res.imag() + << "\n Dist: " << boost::math::epsilon_difference(res.real(), h_out[i].real()) << std::endl; + ++fail_counter; + if (fail_counter > 100) + { + break; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + if (fail_counter > 0) + { + return 1; + } + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_sph_neumann_double.cu b/test/test_sph_neumann_double.cu new file mode 100644 index 0000000000..f59dc7acca --- /dev/null +++ b/test/test_sph_neumann_double.cu @@ -0,0 +1,116 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::sph_neumann(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::sph_neumann(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + bool failed = false; + // check the results + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(output_vector[i]) && std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 5000) + { + std::cout << "error at line: " << i + << "\nParallel: " << results[i] + << "\n Serial: " << output_vector[i] + << "\n Dist: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_sph_neumann_float.cu b/test/test_sph_neumann_float.cu new file mode 100644 index 0000000000..a295e376f6 --- /dev/null +++ b/test/test_sph_neumann_float.cu @@ -0,0 +1,116 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::sph_neumann(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::sph_neumann(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + bool failed = false; + // check the results + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(output_vector[i]) && std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 5000) + { + std::cout << "error at line: " << i + << "\nParallel: " << results[i] + << "\n Serial: " << output_vector[i] + << "\n Dist: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl; + failed = true; + } + } + } + + if (failed) + { + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_sph_neumann_nvrtc_double.cpp b/test/test_sph_neumann_nvrtc_double.cpp new file mode 100644 index 0000000000..61dcb07ddc --- /dev/null +++ b/test/test_sph_neumann_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_sph_neumann_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::sph_neumann(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sph_neumann_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_sph_neumann_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_sph_neumann_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::sph_neumann(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_sph_neumann_nvrtc_float.cpp b/test/test_sph_neumann_nvrtc_float.cpp new file mode 100644 index 0000000000..5d7ae59fee --- /dev/null +++ b/test/test_sph_neumann_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_sph_neumann_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::sph_neumann(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sph_neumann_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_sph_neumann_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_sph_neumann_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::sph_neumann(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_students_t.cpp b/test/test_students_t.cpp index ad4dc4187b..b6aec11f44 100644 --- a/test/test_students_t.cpp +++ b/test/test_students_t.cpp @@ -18,13 +18,19 @@ # pragma warning (disable :4127) // conditional expression is constant. #endif +#include + #define BOOST_TEST_MAIN #include // Boost.Test #include #include // for has_denorm_now +#include "../include_private/boost/math/tools/test.hpp" + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include // for real_concept -#include // for real_concept +#endif + #include "test_out_of_range.hpp" #include using boost::math::students_t_distribution; @@ -35,6 +41,7 @@ using std::setprecision; #include using std::numeric_limits; +#include template RealType naive_pdf(RealType v, RealType t) @@ -528,7 +535,10 @@ void test_spots(RealType) std::string type = typeid(RealType).name(); // if (type != "class boost::math::concepts::real_concept") fails for gcc - if (typeid(RealType) != typeid(boost::math::concepts::real_concept)) + + #ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS + BOOST_MATH_IF_CONSTEXPR(!std::is_same::value) + #endif { // Ordinary floats only. RealType limit = 1/ boost::math::tools::epsilon(); // Default policy to get full accuracy. diff --git a/test/test_students_t_cdf_double.cu b/test/test_students_t_cdf_double.cu new file mode 100644 index 0000000000..e8f47faa25 --- /dev/null +++ b/test/test_students_t_cdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::students_t_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::students_t_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_students_t_cdf_float.cu b/test/test_students_t_cdf_float.cu new file mode 100644 index 0000000000..22fd5d7c01 --- /dev/null +++ b/test/test_students_t_cdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::students_t_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::students_t_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_students_t_cdf_nvrtc_double.cpp b/test/test_students_t_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..c88bdd18d5 --- /dev/null +++ b/test/test_students_t_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_students_t_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::students_t_distribution(1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_students_t_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_students_t_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_students_t_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::students_t_distribution(1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_students_t_cdf_nvrtc_float.cpp b/test/test_students_t_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..b5c1b37229 --- /dev/null +++ b/test/test_students_t_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_students_t_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::students_t_distribution(1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_students_t_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_students_t_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_students_t_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::students_t_distribution(1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_students_t_pdf_double.cu b/test/test_students_t_pdf_double.cu new file mode 100644 index 0000000000..187f63ff52 --- /dev/null +++ b/test/test_students_t_pdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::students_t_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::students_t_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_students_t_pdf_float.cu b/test/test_students_t_pdf_float.cu new file mode 100644 index 0000000000..ba0469b0e3 --- /dev/null +++ b/test/test_students_t_pdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::students_t_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::students_t_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_students_t_pdf_nvrtc_double.cpp b/test/test_students_t_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..e67eb382a0 --- /dev/null +++ b/test/test_students_t_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_students_t_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::students_t_distribution(1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_students_t_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_students_t_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_students_t_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::students_t_distribution(1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_students_t_pdf_nvrtc_float.cpp b/test/test_students_t_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..e0c9556840 --- /dev/null +++ b/test/test_students_t_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_students_t_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::students_t_distribution(1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_students_t_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_students_t_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_students_t_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::students_t_distribution(1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_students_t_quan_double.cu b/test/test_students_t_quan_double.cu new file mode 100644 index 0000000000..fe6d999528 --- /dev/null +++ b/test/test_students_t_quan_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::students_t_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::students_t_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_students_t_quan_float.cu b/test/test_students_t_quan_float.cu new file mode 100644 index 0000000000..6293ec3f83 --- /dev/null +++ b/test/test_students_t_quan_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::students_t_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::students_t_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_students_t_quan_nvrtc_double.cpp b/test/test_students_t_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..e5b5f60f38 --- /dev/null +++ b/test/test_students_t_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_students_t_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::students_t_distribution(1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_students_t_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_students_t_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_students_t_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::students_t_distribution(1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_students_t_quan_nvrtc_float.cpp b/test/test_students_t_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..db41964a93 --- /dev/null +++ b/test/test_students_t_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_students_t_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::students_t_distribution(1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_students_t_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_students_t_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_students_t_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::students_t_distribution(1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_tgamma_double.cu b/test/test_tgamma_double.cu new file mode 100644 index 0000000000..6e4140ab6e --- /dev/null +++ b/test/test_tgamma_double.cu @@ -0,0 +1,102 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::tgamma(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::tgamma(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_tgamma_float.cu b/test/test_tgamma_float.cu new file mode 100644 index 0000000000..cb2d01482d --- /dev/null +++ b/test/test_tgamma_float.cu @@ -0,0 +1,102 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::tgamma(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::tgamma(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_tgamma_ratio_double.cu b/test/test_tgamma_ratio_double.cu new file mode 100644 index 0000000000..059e1c3c67 --- /dev/null +++ b/test/test_tgamma_ratio_double.cu @@ -0,0 +1,104 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::tgamma_ratio(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::tgamma_ratio(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_tgamma_ratio_float.cu b/test/test_tgamma_ratio_float.cu new file mode 100644 index 0000000000..dc669bd7fb --- /dev/null +++ b/test/test_tgamma_ratio_float.cu @@ -0,0 +1,104 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::tgamma_ratio(in1[i], in2[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed input vector B + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = rand()/(float_type)RAND_MAX; + input_vector2[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::tgamma_ratio(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_tgamma_ratio_nvrtc_double.cpp b/test/test_tgamma_ratio_nvrtc_double.cpp new file mode 100644 index 0000000000..5b0c3b1e67 --- /dev/null +++ b/test/test_tgamma_ratio_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_tgamma_ratio_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::tgamma_ratio(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_tgamma_ratio_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_tgamma_ratio_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_tgamma_ratio_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::tgamma_ratio(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_tgamma_ratio_nvrtc_float.cpp b/test/test_tgamma_ratio_nvrtc_float.cpp new file mode 100644 index 0000000000..ab1bf339b4 --- /dev/null +++ b/test/test_tgamma_ratio_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_tgamma_ratio_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::tgamma_ratio(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_tgamma_ratio_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_tgamma_ratio_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_tgamma_ratio_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::tgamma_ratio(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_triangular.cpp b/test/test_triangular.cpp index 8efb17d85c..d8f37b8520 100644 --- a/test/test_triangular.cpp +++ b/test/test_triangular.cpp @@ -8,21 +8,28 @@ // test_triangular.cpp +#ifndef SYCL_LANGUAGE_VERSION #include +#endif #ifdef _MSC_VER # pragma warning(disable: 4127) // conditional expression is constant. # pragma warning(disable: 4305) // truncation from 'long double' to 'float' #endif +#include + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include // for real_concept +#endif + #define BOOST_TEST_MAIN #include // Boost.Test #include #include using boost::math::triangular_distribution; -#include +#include "../include_private/boost/math/tools/test.hpp" #include #include "test_out_of_range.hpp" @@ -463,8 +470,11 @@ void test_spots(RealType) BOOST_CHECK_CLOSE_FRACTION( mode(tridef), static_cast(0), tolerance); // skewness: + // On device the result does not get flushed exactly to zero so the eps difference is by default huge + #ifndef BOOST_MATH_HAS_GPU_SUPPORT BOOST_CHECK_CLOSE_FRACTION( median(tridef), static_cast(0), tolerance); + #endif // https://reference.wolfram.com/language/ref/Skewness.html skewness{-1, 0, +1} = 0 // skewness[triangulardistribution{-1, 0, +1}] does not compute a result. // skewness[triangulardistribution{0, +1}] result == 0 diff --git a/test/test_triangular_cdf_double.cu b/test/test_triangular_cdf_double.cu new file mode 100644 index 0000000000..38affb91bd --- /dev/null +++ b/test/test_triangular_cdf_double.cu @@ -0,0 +1,113 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::triangular_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::triangular_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_triangular_cdf_float.cu b/test/test_triangular_cdf_float.cu new file mode 100644 index 0000000000..c1bb22bd3f --- /dev/null +++ b/test/test_triangular_cdf_float.cu @@ -0,0 +1,113 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::triangular_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::triangular_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_triangular_cdf_nvrtc_double.cpp b/test/test_triangular_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..f23009d92d --- /dev/null +++ b/test/test_triangular_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_triangular_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::triangular_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_triangular_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_triangular_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_triangular_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::triangular_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_triangular_cdf_nvrtc_float.cpp b/test/test_triangular_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..e17f5c8146 --- /dev/null +++ b/test/test_triangular_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_triangular_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::triangular_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_triangular_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_triangular_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_triangular_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::triangular_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_triangular_pdf_double.cu b/test/test_triangular_pdf_double.cu new file mode 100644 index 0000000000..38050faff8 --- /dev/null +++ b/test/test_triangular_pdf_double.cu @@ -0,0 +1,113 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::triangular_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::triangular_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_triangular_pdf_float.cu b/test/test_triangular_pdf_float.cu new file mode 100644 index 0000000000..82e1be5fcc --- /dev/null +++ b/test/test_triangular_pdf_float.cu @@ -0,0 +1,113 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::triangular_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::triangular_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_triangular_pdf_nvrtc_double.cpp b/test/test_triangular_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..4fd23a51dc --- /dev/null +++ b/test/test_triangular_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_triangular_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::triangular_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_triangular_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_triangular_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_triangular_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::triangular_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_triangular_pdf_nvrtc_float.cpp b/test/test_triangular_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..17128139cc --- /dev/null +++ b/test/test_triangular_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_triangular_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::triangular_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_triangular_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_triangular_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_triangular_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::triangular_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_triangular_quan_double.cu b/test/test_triangular_quan_double.cu new file mode 100644 index 0000000000..5751ead020 --- /dev/null +++ b/test/test_triangular_quan_double.cu @@ -0,0 +1,113 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::triangular_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::triangular_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_triangular_quan_float.cu b/test/test_triangular_quan_float.cu new file mode 100644 index 0000000000..579e10fd54 --- /dev/null +++ b/test/test_triangular_quan_float.cu @@ -0,0 +1,113 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::triangular_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::triangular_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_triangular_quan_nvrtc_double.cpp b/test/test_triangular_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..c41b3a11bc --- /dev/null +++ b/test/test_triangular_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_triangular_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::triangular_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_triangular_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_triangular_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_triangular_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::triangular_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_triangular_quan_nvrtc_float.cpp b/test/test_triangular_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..256a7f2d28 --- /dev/null +++ b/test/test_triangular_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_triangular_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::triangular_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_triangular_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_triangular_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_triangular_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::triangular_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_trigamma.cpp b/test/test_trigamma.cpp index dd89898d76..85ba8078af 100644 --- a/test/test_trigamma.cpp +++ b/test/test_trigamma.cpp @@ -1,9 +1,13 @@ // (C) Copyright John Maddock 2014. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif + #include "test_trigamma.hpp" void expected_results() diff --git a/test/test_trigamma.hpp b/test/test_trigamma.hpp index 94a1290327..49b1bd5501 100644 --- a/test/test_trigamma.hpp +++ b/test/test_trigamma.hpp @@ -1,4 +1,5 @@ -// Copyright John Maddock 2014 +// Copyright John Maddock 2014 +// Copyright Matt Borland 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -9,9 +10,10 @@ #include #define BOOST_TEST_MAIN #include +#include +#include "../include_private/boost/math/tools/test.hpp" #include #include -#include #include #include #include diff --git a/test/test_trigamma_double.cu b/test/test_trigamma_double.cu new file mode 100644 index 0000000000..6780e3e924 --- /dev/null +++ b/test/test_trigamma_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::trigamma(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::trigamma(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_trigamma_float.cu b/test/test_trigamma_float.cu new file mode 100644 index 0000000000..a407a0eb18 --- /dev/null +++ b/test/test_trigamma_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::trigamma(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::trigamma(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_trigamma_nvrtc_double.cpp b/test/test_trigamma_nvrtc_double.cpp new file mode 100644 index 0000000000..46877acce1 --- /dev/null +++ b/test/test_trigamma_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_trigamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::trigamma(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_trigamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_trigamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_trigamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::trigamma(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_trigamma_nvrtc_float.cpp b/test/test_trigamma_nvrtc_float.cpp new file mode 100644 index 0000000000..083c7d8767 --- /dev/null +++ b/test/test_trigamma_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_trigamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::trigamma(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_trigamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_trigamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_trigamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::trigamma(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_trunc_double.cu b/test/test_trunc_double.cu new file mode 100644 index 0000000000..5a2d7b622b --- /dev/null +++ b/test/test_trunc_double.cu @@ -0,0 +1,97 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::trunc(in[i]) + boost::math::itrunc(in[i]) + boost::math::ltrunc(in[i]) + boost::math::lltrunc(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(4 * boost::math::trunc(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(h_C[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_trunc_float.cu b/test/test_trunc_float.cu new file mode 100644 index 0000000000..d6fe4d3525 --- /dev/null +++ b/test/test_trunc_float.cu @@ -0,0 +1,97 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::trunc(in[i]) + boost::math::itrunc(in[i]) + boost::math::ltrunc(in[i]) + boost::math::lltrunc(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(4 * boost::math::trunc(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(h_C[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_trunc_nvrtc_double.cpp b/test/test_trunc_nvrtc_double.cpp new file mode 100644 index 0000000000..1aab64887b --- /dev/null +++ b/test/test_trunc_nvrtc_double.cpp @@ -0,0 +1,196 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_trunc_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::trunc(in1[i]) + + boost::math::itrunc(in1[i]) + + boost::math::ltrunc(in1[i]) + + boost::math::lltrunc(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_trunc_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_trunc_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_trunc_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::trunc(h_in1[i]) + + boost::math::itrunc(h_in1[i]) + + boost::math::ltrunc(h_in1[i]) + + boost::math::lltrunc(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_trunc_nvrtc_float.cpp b/test/test_trunc_nvrtc_float.cpp new file mode 100644 index 0000000000..13ad4bc51b --- /dev/null +++ b/test/test_trunc_nvrtc_float.cpp @@ -0,0 +1,196 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_trunc_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::trunc(in1[i]) + + boost::math::itrunc(in1[i]) + + boost::math::ltrunc(in1[i]) + + boost::math::lltrunc(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_trunc_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_trunc_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_trunc_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::trunc(h_in1[i]) + + boost::math::itrunc(h_in1[i]) + + boost::math::ltrunc(h_in1[i]) + + boost::math::lltrunc(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_uniform.cpp b/test/test_uniform.cpp index 65d192a4d6..4e034cab74 100644 --- a/test/test_uniform.cpp +++ b/test/test_uniform.cpp @@ -8,21 +8,28 @@ // test_uniform.cpp +#ifndef SYCL_LANGUAGE_VERSION #include +#endif #ifdef _MSC_VER # pragma warning(disable: 4127) // conditional expression is constant. # pragma warning(disable: 4100) // unreferenced formal parameter. #endif +#include + +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include // for real_concept +#endif + #define BOOST_TEST_MAIN #include // Boost.Test #include #include using boost::math::uniform_distribution; -#include +#include "../include_private/boost/math/tools/test.hpp" #include "test_out_of_range.hpp" #include diff --git a/test/test_uniform_cdf_double.cu b/test/test_uniform_cdf_double.cu new file mode 100644 index 0000000000..beb98c34dd --- /dev/null +++ b/test/test_uniform_cdf_double.cu @@ -0,0 +1,113 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::uniform_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::uniform_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_uniform_cdf_float.cu b/test/test_uniform_cdf_float.cu new file mode 100644 index 0000000000..7aef4a6be1 --- /dev/null +++ b/test/test_uniform_cdf_float.cu @@ -0,0 +1,113 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::uniform_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::uniform_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_uniform_cdf_nvrtc_double.cpp b/test/test_uniform_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..1b6b563a05 --- /dev/null +++ b/test/test_uniform_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_uniform_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::uniform_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_uniform_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_uniform_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_uniform_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::uniform_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_uniform_cdf_nvrtc_float.cpp b/test/test_uniform_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..6ba98900f9 --- /dev/null +++ b/test/test_uniform_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_uniform_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::uniform_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_uniform_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_uniform_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_uniform_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::uniform_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_uniform_pdf_double.cu b/test/test_uniform_pdf_double.cu new file mode 100644 index 0000000000..6b1cf83e0d --- /dev/null +++ b/test/test_uniform_pdf_double.cu @@ -0,0 +1,113 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::uniform_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::uniform_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_uniform_pdf_float.cu b/test/test_uniform_pdf_float.cu new file mode 100644 index 0000000000..4b003d22a1 --- /dev/null +++ b/test/test_uniform_pdf_float.cu @@ -0,0 +1,113 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::uniform_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::uniform_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_uniform_pdf_nvrtc_double.cpp b/test/test_uniform_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..f638c43961 --- /dev/null +++ b/test/test_uniform_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_uniform_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::uniform_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_uniform_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_uniform_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_uniform_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::uniform_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_uniform_pdf_nvrtc_float.cpp b/test/test_uniform_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..81da44417a --- /dev/null +++ b/test/test_uniform_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_uniform_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::uniform_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_uniform_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_uniform_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_uniform_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::uniform_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_uniform_quan_double.cu b/test/test_uniform_quan_double.cu new file mode 100644 index 0000000000..ab11374754 --- /dev/null +++ b/test/test_uniform_quan_double.cu @@ -0,0 +1,113 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::uniform_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::uniform_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_uniform_quan_float.cu b/test/test_uniform_quan_float.cu new file mode 100644 index 0000000000..7a7e4ccf50 --- /dev/null +++ b/test/test_uniform_quan_float.cu @@ -0,0 +1,113 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::uniform_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::uniform_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (std::isfinite(results[i])) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_uniform_quan_nvrtc_double.cpp b/test/test_uniform_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..13b8c6d230 --- /dev/null +++ b/test/test_uniform_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_uniform_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::uniform_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_uniform_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_uniform_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_uniform_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::uniform_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_uniform_quan_nvrtc_float.cpp b/test/test_uniform_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..57372dee3f --- /dev/null +++ b/test/test_uniform_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_uniform_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::uniform_distribution(), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_uniform_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_uniform_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_uniform_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::uniform_distribution(), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_weibull.cpp b/test/test_weibull.cpp index 4b31a7f0b7..dc509b742b 100644 --- a/test/test_weibull.cpp +++ b/test/test_weibull.cpp @@ -12,15 +12,17 @@ # pragma warning (disable : 4127) // conditional expression is constant. #endif - +#include +#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS #include // for real_concept +#endif #define BOOST_TEST_MAIN #include // Boost.Test #include #include using boost::math::weibull_distribution; -#include +#include "../include_private/boost/math/tools/test.hpp" #include "test_out_of_range.hpp" #include diff --git a/test/test_weibull_cdf_double.cu b/test/test_weibull_cdf_double.cu new file mode 100644 index 0000000000..1b2e5cf0db --- /dev/null +++ b/test/test_weibull_cdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::weibull_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::weibull_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_weibull_cdf_float.cu b/test/test_weibull_cdf_float.cu new file mode 100644 index 0000000000..76bf3a4e1c --- /dev/null +++ b/test/test_weibull_cdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::weibull_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::weibull_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_weibull_cdf_nvrtc_double.cpp b/test/test_weibull_cdf_nvrtc_double.cpp new file mode 100644 index 0000000000..60d5ff5afb --- /dev/null +++ b/test/test_weibull_cdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_weibull_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::weibull_distribution(1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_weibull_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_weibull_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_weibull_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::weibull_distribution(1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_weibull_cdf_nvrtc_float.cpp b/test/test_weibull_cdf_nvrtc_float.cpp new file mode 100644 index 0000000000..5085b2f7dd --- /dev/null +++ b/test/test_weibull_cdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_weibull_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = cdf(boost::math::weibull_distribution(1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_weibull_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_weibull_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_weibull_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = cdf(boost::math::weibull_distribution(1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_weibull_pdf_double.cu b/test/test_weibull_pdf_double.cu new file mode 100644 index 0000000000..dd48b57d60 --- /dev/null +++ b/test/test_weibull_pdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::weibull_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::weibull_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_weibull_pdf_float.cu b/test/test_weibull_pdf_float.cu new file mode 100644 index 0000000000..40064b1ed7 --- /dev/null +++ b/test/test_weibull_pdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::weibull_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::weibull_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_weibull_pdf_nvrtc_double.cpp b/test/test_weibull_pdf_nvrtc_double.cpp new file mode 100644 index 0000000000..2e5e237b20 --- /dev/null +++ b/test/test_weibull_pdf_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_weibull_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::weibull_distribution(1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_weibull_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_weibull_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_weibull_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::weibull_distribution(1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_weibull_pdf_nvrtc_float.cpp b/test/test_weibull_pdf_nvrtc_float.cpp new file mode 100644 index 0000000000..6c3c5202c1 --- /dev/null +++ b/test/test_weibull_pdf_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_weibull_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = pdf(boost::math::weibull_distribution(1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_weibull_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_weibull_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_weibull_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = pdf(boost::math::weibull_distribution(1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_weibull_quan_double.cu b/test/test_weibull_quan_double.cu new file mode 100644 index 0000000000..9263fb5365 --- /dev/null +++ b/test/test_weibull_quan_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::weibull_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::weibull_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_weibull_quan_float.cu b/test/test_weibull_quan_float.cu new file mode 100644 index 0000000000..5dd6bd6eef --- /dev/null +++ b/test/test_weibull_quan_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::weibull_distribution(1), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::weibull_distribution(1), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_weibull_quan_nvrtc_double.cpp b/test/test_weibull_quan_nvrtc_double.cpp new file mode 100644 index 0000000000..aed31865e8 --- /dev/null +++ b/test/test_weibull_quan_nvrtc_double.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_weibull_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::weibull_distribution(1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_weibull_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_weibull_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_weibull_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::weibull_distribution(1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_weibull_quan_nvrtc_float.cpp b/test/test_weibull_quan_nvrtc_float.cpp new file mode 100644 index 0000000000..98997b354b --- /dev/null +++ b/test/test_weibull_quan_nvrtc_float.cpp @@ -0,0 +1,191 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_weibull_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = quantile(boost::math::weibull_distribution(1), in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_weibull_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_weibull_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_weibull_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + auto res = quantile(boost::math::weibull_distribution(1), h_in1[i]); + + if (boost::math::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/tools/Jamfile.v2 b/tools/Jamfile.v2 index 4a986620b7..56155ec3cc 100644 --- a/tools/Jamfile.v2 +++ b/tools/Jamfile.v2 @@ -1,6 +1,6 @@ # Copyright John Maddock 2010 -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or copy at +# Distributed under the Boost Software License, Version 1.0. +# (See accompanying file LICENSE_1_0.txt or copy at # http://www.boost.org/LICENSE_1_0.txt. # \math_toolkit\libs\math\test\jamfile.v2 # Runs all math toolkit tests, functions & distributions, @@ -9,45 +9,47 @@ # bring in the rules for testing import modules ; import path ; -import ../../config/checks/config : requires ; +import-search /boost/config/checks ; +import config : requires ; -project - : requirements +project + : requirements gcc:-Wno-missing-braces darwin:-Wno-missing-braces acc:+W2068,2461,2236,4070,4069 - intel-win:-nologo - intel-win:-nologo + intel-win:-nologo + intel-win:-nologo msvc:all msvc:on msvc:/wd4996 - msvc:/wd4512 - msvc:/wd4610 - msvc:/wd4510 - msvc:/wd4127 + msvc:/wd4512 + msvc:/wd4610 + msvc:/wd4510 + msvc:/wd4127 msvc:/wd4701 # needed for lexical cast - temporary. static borland:static - ../../.. BOOST_ALL_NO_LIB=1 BOOST_UBLAS_UNSUPPORTED_COMPILER=0 . ../include_private + /boost/multiprecision//boost_multiprecision + /boost/algorithm//boost_algorithm ; -lib gmp ; -lib mpfr ; -lib mpfi ; -lib quadmath ; +searched-lib gmp : : shared ; +searched-lib mpfr : : shared ; +searched-lib mpfi : : shared ; +searched-lib quadmath : : shared ; exe bessel_data : bessel_data.cpp : [ check-target-builds ../config//is_ci_standalone_run : no ] ; install bessel_data_install : bessel_data : bin ; -exe ellint_f_data : ellint_f_data.cpp ; +exe ellint_f_data : ellint_f_data.cpp /boost/test//included ; install ellint_f_data_install : ellint_f_data : bin ; -exe heuman_lambda_data : heuman_lambda_data.cpp ; +exe heuman_lambda_data : heuman_lambda_data.cpp /boost/test//included ; install heuman_lambda_data_install : heuman_lambda_data : bin ; exe hyp_2f2_data : hyp_2f2_data.cpp ; @@ -56,24 +58,24 @@ install hyp_2f2_data_install : hyp_2f2_data : bin ; exe laguerre_data : laguerre_data.cpp ; install laguerre_data_install : laguerre_data : bin ; -exe bessel_derivative_data : bessel_derivative_data.cpp : -[ check-target-builds ../../multiprecision/config//has_gmp : gmp : no ] -[ check-target-builds ../../multiprecision/config//has_mpfr : mpfr : no ] +exe bessel_derivative_data : bessel_derivative_data.cpp /boost/test//included : +[ check-target-builds /boost/multiprecision/config//has_gmp : gmp : no ] +[ check-target-builds /boost/multiprecision/config//has_mpfr : mpfr : no ] [ check-target-builds ../config//is_ci_standalone_run : no ] ; install bessel_derivative_data_install : bessel_derivative_data : bin ; -exe ellint_k_data : ellint_k_data.cpp ; +exe ellint_k_data : ellint_k_data.cpp /boost/test//included ; install ellint_k_data_install : ellint_k_data : bin ; exe hyp_0f2_data : hyp_0f2_data.cpp ; install hyp_0f2_data_install : hyp_0f2_data : bin ; -exe hypergeometric_dist_data : hypergeometric_dist_data.cpp : -[ check-target-builds ../config//is_ci_standalone_run : no ] +exe hypergeometric_dist_data : hypergeometric_dist_data.cpp : +[ check-target-builds ../config//is_ci_standalone_run : no ] [ requires cxx11_hdr_random ] ; install hypergeometric_dist_data_install : hypergeometric_dist_data : bin ; -exe legendre_data : legendre_data.cpp : +exe legendre_data : legendre_data.cpp : [ check-target-builds ../config//is_ci_standalone_run : no ] ; install legendre_data_install : legendre_data : bin ; @@ -81,13 +83,13 @@ exe beta_data : beta_data.cpp : [ check-target-builds ../config//is_ci_standalone_run : no ] ; install beta_data_install : beta_data : bin ; -exe ellint_pi2_data : ellint_pi2_data.cpp ; +exe ellint_pi2_data : ellint_pi2_data.cpp /boost/test//included ; install ellint_pi2_data_install : ellint_pi2_data : bin ; -exe hyp_1f1_big_data : hyp_1f1_big_data.cpp : -[ check-target-builds ../../multiprecision/config//has_gmp : gmp : no ] -[ check-target-builds ../../multiprecision/config//has_mpfr : mpfr : no ] -[ check-target-builds ../../multiprecision/config//has_mpfi : gmp mpfr mpfi : no ] +exe hyp_1f1_big_data : hyp_1f1_big_data.cpp : +[ check-target-builds /boost/multiprecision/config//has_gmp : gmp : no ] +[ check-target-builds /boost/multiprecision/config//has_mpfr : mpfr : no ] +[ check-target-builds /boost/multiprecision/config//has_mpfi : gmp mpfr mpfi : no ] [ requires cxx11_decltype ] ; install hyp_1f1_big_data_install : hyp_1f1_big_data : bin ; @@ -98,68 +100,68 @@ install ibeta_data_install : ibeta_data : bin ; exe log1p_expm1_data : log1p_expm1_data.cpp ; install log1p_expm1_data_install : log1p_expm1_data : bin ; -exe carlson_ellint_data : carlson_ellint_data.cpp ; +exe carlson_ellint_data : carlson_ellint_data.cpp /boost/test//included ; install carlson_ellint_data_install : carlson_ellint_data : bin ; -exe ellint_pi3_data : ellint_pi3_data.cpp ; +exe ellint_pi3_data : ellint_pi3_data.cpp /boost/test//included ; install ellint_pi3_data_install : ellint_pi3_data : bin ; -exe hyp_1f1_data : hyp_1f1_data.cpp : -[ check-target-builds ../../multiprecision/config//has_gmp : gmp : no ] -[ check-target-builds ../../multiprecision/config//has_mpfr : mpfr : no ] -[ check-target-builds ../../multiprecision/config//has_mpfi : gmp mpfr mpfi : no ] +exe hyp_1f1_data : hyp_1f1_data.cpp : +[ check-target-builds /boost/multiprecision/config//has_gmp : gmp : no ] +[ check-target-builds /boost/multiprecision/config//has_mpfr : mpfr : no ] +[ check-target-builds /boost/multiprecision/config//has_mpfi : gmp mpfr mpfi : no ] [ requires cxx11_decltype ] ; install hyp_1f1_data_install : hyp_1f1_data : bin ; -exe ibeta_derivative_data : ibeta_derivative_data.cpp : +exe ibeta_derivative_data : ibeta_derivative_data.cpp /boost/math//testing : [ check-target-builds ../config//is_ci_standalone_run : no ] ; install ibeta_derivative_data_install : ibeta_derivative_data : bin ; -exe sinc_data : sinc_data.cpp ; +exe sinc_data : sinc_data.cpp /boost/test//included ; install sinc_data_install : sinc_data : bin ; exe cbrt_data : cbrt_data.cpp ; install cbrt_data_install : cbrt_data : bin ; -exe erf_data : erf_data.cpp : [ check-target-builds ../../multiprecision/config//has_float128 : quadmath : no ] ; +exe erf_data : erf_data.cpp : [ check-target-builds /boost/multiprecision/config//has_float128 : quadmath : no ] ; install erf_data_install : erf_data : bin ; -exe hyp_1f1_log_big_data : hyp_1f1_log_big_data.cpp : -[ check-target-builds ../../multiprecision/config//has_gmp : gmp : no ] -[ check-target-builds ../../multiprecision/config//has_mpfr : mpfr : no ] -[ check-target-builds ../../multiprecision/config//has_mpfi : gmp mpfr mpfi : no ] +exe hyp_1f1_log_big_data : hyp_1f1_log_big_data.cpp : +[ check-target-builds /boost/multiprecision/config//has_gmp : gmp : no ] +[ check-target-builds /boost/multiprecision/config//has_mpfr : mpfr : no ] +[ check-target-builds /boost/multiprecision/config//has_mpfi : gmp mpfr mpfi : no ] [ requires cxx11_decltype ] ; install hyp_1f1_log_big_data_install : hyp_1f1_log_big_data : bin ; -exe ibeta_inv_data : ibeta_inv_data.cpp : [ check-target-builds ../../multiprecision/config//has_float128 : quadmath : no ] ; +exe ibeta_inv_data : ibeta_inv_data.cpp : [ check-target-builds /boost/multiprecision/config//has_float128 : quadmath : no ] ; install ibeta_inv_data_install : ibeta_inv_data : bin ; exe spherical_harmonic_data : spherical_harmonic_data.cpp : [ check-target-builds ../config//is_ci_standalone_run : no ] ; install spherical_harmonic_data_install : spherical_harmonic_data : bin ; -exe digamma_data : digamma_data.cpp : [ check-target-builds ../../multiprecision/config//has_float128 : quadmath : no ] ; +exe digamma_data : digamma_data.cpp /boost/test//included : [ check-target-builds /boost/multiprecision/config//has_float128 : quadmath : no ] ; install digamma_data_install : digamma_data : bin ; exe expint_data : expint_data.cpp ; install expint_data_install : expint_data : bin ; -exe hyp_1f1_reg_big_data : hyp_1f1_reg_big_data.cpp : -[ check-target-builds ../../multiprecision/config//has_gmp : gmp : no ] -[ check-target-builds ../../multiprecision/config//has_mpfr : mpfr : no ] -[ check-target-builds ../../multiprecision/config//has_mpfi : gmp mpfr mpfi : no ] +exe hyp_1f1_reg_big_data : hyp_1f1_reg_big_data.cpp : +[ check-target-builds /boost/multiprecision/config//has_gmp : gmp : no ] +[ check-target-builds /boost/multiprecision/config//has_mpfr : mpfr : no ] +[ check-target-builds /boost/multiprecision/config//has_mpfi : gmp mpfr mpfi : no ] [ requires cxx11_decltype ] ; install hyp_1f1_reg_big_data_install : hyp_1f1_reg_big_data : bin ; -exe ibeta_invab_data : ibeta_invab_data.cpp : [ check-target-builds ../../multiprecision/config//has_float128 : quadmath : no ] ; +exe ibeta_invab_data : ibeta_invab_data.cpp : [ check-target-builds /boost/multiprecision/config//has_float128 : quadmath : no ] ; install ibeta_invab_data_install : ibeta_invab_data : bin ; -exe tgamma_large_data : tgamma_large_data.cpp : -[ check-target-builds ../../multiprecision/config//has_gmp : gmp : no ] -[ check-target-builds ../../multiprecision/config//has_mpfr : mpfr : no ] ; +exe tgamma_large_data : tgamma_large_data.cpp /boost/test//included : +[ check-target-builds /boost/multiprecision/config//has_gmp : gmp : no ] +[ check-target-builds /boost/multiprecision/config//has_mpfr : mpfr : no ] ; install tgamma_large_data_install : tgamma_large_data : bin ; -exe ellint_d_data : ellint_d_data.cpp ; +exe ellint_d_data : ellint_d_data.cpp /boost/test//included ; install ellint_d_data_install : ellint_d_data : bin ; exe expint_i_data : expint_i_data.cpp ; @@ -168,17 +170,17 @@ install expint_i_data_install : expint_i_data : bin ; exe hyp_1f2_data : hyp_1f2_data.cpp ; install hyp_1f2_data_install : hyp_1f2_data : bin ; -exe igamma_data : igamma_data.cpp : [ check-target-builds ../../multiprecision/config//has_float128 : quadmath : no ] ; +exe igamma_data : igamma_data.cpp : [ check-target-builds /boost/multiprecision/config//has_float128 : quadmath : no ] ; install igamma_data_install : igamma_data : bin ; -exe tgamma_ratio_data : tgamma_ratio_data.cpp : +exe tgamma_ratio_data : tgamma_ratio_data.cpp : [ check-target-builds ../config//is_ci_standalone_run : no ] ; install tgamma_ratio_data_install : tgamma_ratio_data : bin ; -exe ellint_d2_data : ellint_d2_data.cpp ; +exe ellint_d2_data : ellint_d2_data.cpp /boost/test//included ; install ellint_d2_data_install : ellint_d2_data : bin ; -exe gamma_P_inva_data : gamma_P_inva_data.cpp : [ check-target-builds ../../multiprecision/config//has_float128 : quadmath : no ] ; +exe gamma_P_inva_data : gamma_P_inva_data.cpp : [ check-target-builds /boost/multiprecision/config//has_float128 : quadmath : no ] ; install gamma_P_inva_data_install : gamma_P_inva_data : bin ; exe hyp_2f0_data : hyp_2f0_data.cpp ; @@ -190,7 +192,7 @@ install inv_hyp_data_install : inv_hyp_data : bin ; exe trig_data : trig_data.cpp ; install trig_data_install : trig_data : bin ; -exe ellint_e_data : ellint_e_data.cpp ; +exe ellint_e_data : ellint_e_data.cpp /boost/test//included ; install ellint_e_data_install : ellint_e_data : bin ; exe hermite_data : hermite_data.cpp ; @@ -199,10 +201,10 @@ install hermite_data_install : hermite_data : bin ; exe hyp_2f1_data : hyp_2f1_data.cpp ; install hyp_2f1_data_install : hyp_2f1_data : bin ; -exe jacobi_theta_data : jacobi_theta_data.cpp ; +exe jacobi_theta_data : jacobi_theta_data.cpp /boost/test//included ; install jacobi_theta_data_install : jacobi_theta_data : bin ; -exe jacobi_zeta_data : jacobi_zeta_data.cpp ; +exe jacobi_zeta_data : jacobi_zeta_data.cpp /boost/test//included ; install jacobi_zeta_data_install : jacobi_zeta_data : bin ; exe zeta_data : zeta_data.cpp : @@ -215,8 +217,8 @@ install generate_test_values_install : generate_test_values : bin ; exe igamma_temme_large_coef : igamma_temme_large_coef.cpp ; install igamma_temme_large_coef_install : igamma_temme_large_coef : bin ; -exe lanczos_generator : lanczos_generator.cpp ../../chrono/build//boost_chrono ../../system/build//boost_system : -[ check-target-builds ../../multiprecision/config//has_float128 : quadmath : no ] +exe lanczos_generator : lanczos_generator.cpp /boost/chrono//boost_chrono /boost/system//boost_system : +[ check-target-builds /boost/multiprecision/config//has_float128 : quadmath : no ] [ requires cxx11_nullptr ] ; install lanczos_generator_install : lanczos_generator : bin ; @@ -228,11 +230,11 @@ install generate_rational_test_install : generate_rational_test : bin #for local source in [ glob *_data.cpp ] generate_test_values.cpp igamma_temme_large_coef.cpp lanczos_generator.cpp factorial_tables.cpp generate_rational_test.cpp #{ -# exe $(source:B) : $(source) : [ check-target-builds ../../multiprecision/config//has_gmp : HAS_GMP gmp : no ] [ check-target-builds ../../multiprecision/config//has_mpfr : HAS_MPFR mpfr : no ] [ check-target-builds ../../multiprecision/config//has_mpfi : HAS_MPFI gmp mpfr mpfi ] ; +# exe $(source:B) : $(source) : [ check-target-builds /boost/multiprecision/config//has_gmp : HAS_GMP gmp : no ] [ check-target-builds /boost/multiprecision/config//has_mpfr : HAS_MPFR mpfr : no ] [ check-target-builds /boost/multiprecision/config//has_mpfi : HAS_MPFI gmp mpfr mpfi ] ; # install $(source:B)_bin : $(source:B) : bin ; #} exe generate_rational_code : generate_rational_code.cpp ; -exe process_perf_results : process_perf_results.cpp ; +exe process_perf_results : process_perf_results.cpp /boost/format//boost_format ; install bin : generate_rational_code process_perf_results ; diff --git a/tools/generate_rational_code.cpp b/tools/generate_rational_code.cpp index 2da7e000ea..20ffde8503 100644 --- a/tools/generate_rational_code.cpp +++ b/tools/generate_rational_code.cpp @@ -40,13 +40,13 @@ void print_polynomials(int max_order) "#define BOOST_MATH_TOOLS_POLY_EVAL_" << i << "_HPP\n\n" "namespace boost{ namespace math{ namespace tools{ namespace detail{\n\n" "template \n" - "inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*)\n" "{\n" " return static_cast(0);\n" "}\n" "\n" "template \n" - "inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*)\n" "{\n" " return static_cast(a[0]);\n" "}\n\n"; @@ -55,7 +55,7 @@ void print_polynomials(int max_order) { ofs << "template \n" - "inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*)\n" "{\n" " return static_cast("; @@ -90,28 +90,28 @@ void print_polynomials(int max_order) "#define BOOST_MATH_TOOLS_POLY_EVAL_" << i << "_HPP\n\n" "namespace boost{ namespace math{ namespace tools{ namespace detail{\n\n" "template \n" - "inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*)\n" "{\n" " return static_cast(0);\n" "}\n" "\n" "template \n" - "inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*)\n" "{\n" " return static_cast(a[0]);\n" "}\n\n" "template \n" - "inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*)\n" "{\n" " return static_cast(a[1] * x + a[0]);\n" "}\n\n" "template \n" - "inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*)\n" "{\n" " return static_cast((a[2] * x + a[1]) * x + a[0]);\n" "}\n\n" "template \n" - "inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*)\n" "{\n" " return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);\n" "}\n\n"; @@ -120,7 +120,7 @@ void print_polynomials(int max_order) { ofs << "template \n" - "inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*)\n" "{\n" " V x2 = x * x;\n" " return static_cast("; @@ -186,28 +186,28 @@ void print_polynomials(int max_order) "#define BOOST_MATH_TOOLS_POLY_EVAL_" << i << "_HPP\n\n" "namespace boost{ namespace math{ namespace tools{ namespace detail{\n\n" "template \n" - "inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant*)\n" "{\n" " return static_cast(0);\n" "}\n" "\n" "template \n" - "inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant*)\n" "{\n" " return static_cast(a[0]);\n" "}\n\n" "template \n" - "inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*)\n" "{\n" " return static_cast(a[1] * x + a[0]);\n" "}\n\n" "template \n" - "inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*)\n" "{\n" " return static_cast((a[2] * x + a[1]) * x + a[0]);\n" "}\n\n" "template \n" - "inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*)\n" "{\n" " return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);\n" "}\n\n"; @@ -216,7 +216,7 @@ void print_polynomials(int max_order) { ofs << "template \n" - "inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant*)\n" "{\n" " V x2 = x * x;\n" " V t[2];\n"; @@ -281,13 +281,13 @@ void print_rationals(int max_order) "#define BOOST_MATH_TOOLS_POLY_RAT_" << i << "_HPP\n\n" "namespace boost{ namespace math{ namespace tools{ namespace detail{\n\n" "template \n" - "inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*)\n" "{\n" " return static_cast(0);\n" "}\n" "\n" "template \n" - "inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*)\n" "{\n" " return static_cast(a[0]) / static_cast(b[0]);\n" "}\n\n"; @@ -296,7 +296,7 @@ void print_rationals(int max_order) { ofs << "template \n" - "inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*)\n" "{\n" " if((-1 <= x) && (x <= 1))\n" " return static_cast(("; @@ -361,28 +361,28 @@ void print_rationals(int max_order) "#define BOOST_MATH_TOOLS_RAT_EVAL_" << i << "_HPP\n\n" "namespace boost{ namespace math{ namespace tools{ namespace detail{\n\n" "template \n" - "inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*)\n" "{\n" " return static_cast(0);\n" "}\n" "\n" "template \n" - "inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*)\n" "{\n" " return static_cast(a[0]) / static_cast(b[0]);\n" "}\n\n" "template \n" - "inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*)\n" "{\n" " return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0]));\n" "}\n\n" "template \n" - "inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*)\n" "{\n" " return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));\n" "}\n\n" "template \n" - "inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*)\n" "{\n" " return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));\n" "}\n\n"; @@ -391,7 +391,7 @@ void print_rationals(int max_order) { ofs << "template \n" - "inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*)\n" "{\n" " if((-1 <= x) && (x <= 1))\n {\n" " V x2 = x * x;\n" @@ -577,28 +577,28 @@ void print_rationals(int max_order) "#define BOOST_MATH_TOOLS_RAT_EVAL_" << i << "_HPP\n\n" "namespace boost{ namespace math{ namespace tools{ namespace detail{\n\n" "template \n" - "inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant*)\n" "{\n" " return static_cast(0);\n" "}\n" "\n" "template \n" - "inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant*)\n" "{\n" " return static_cast(a[0]) / static_cast(b[0]);\n" "}\n\n" "template \n" - "inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*)\n" "{\n" " return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0]));\n" "}\n\n" "template \n" - "inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*)\n" "{\n" " return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));\n" "}\n\n" "template \n" - "inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*)\n" "{\n" " return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));\n" "}\n\n"; @@ -607,7 +607,7 @@ void print_rationals(int max_order) { ofs << "template \n" - "inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant*)\n" + "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant*)\n" "{\n" " if((-1 <= x) && (x <= 1))\n {\n" " V x2 = x * x;\n" diff --git a/tools/ibeta_derivative_data.cpp b/tools/ibeta_derivative_data.cpp index f00fe46785..27d647410d 100644 --- a/tools/ibeta_derivative_data.cpp +++ b/tools/ibeta_derivative_data.cpp @@ -17,11 +17,11 @@ using namespace boost::math::tools; using namespace boost::math; using namespace std; -#include +#include #define T double #define SC_(x) static_cast(x) -#include +#include int main(int, char* []) { diff --git a/tools/nc_t_data.cpp b/tools/nc_t_data.cpp index 90b9e61558..ff3b994739 100644 --- a/tools/nc_t_data.cpp +++ b/tools/nc_t_data.cpp @@ -24,7 +24,7 @@ #include #include -#include +#include using namespace boost::math::tools; using namespace boost::math; @@ -91,7 +91,7 @@ int main(int, char* []) boost::math::quadrature::exp_sinh integrator(10); using T = float; -#include +#include for (unsigned i = 0; i < nct.size(); ++i) @@ -127,7 +127,7 @@ int main(int, char* []) std::cout << cdf << "), SC_(" << ccdf << ") }}," << std::endl; } -#include +#include for (unsigned i = 0; i < nct_small_delta.size(); ++i) { big_t error1, error2;